Use XHS video tab channel

This commit is contained in:
wangshaoqing 2026-05-27 17:57:02 +08:00
parent a43168d5a9
commit b13f170ce8
3 changed files with 7 additions and 5 deletions

View File

@ -74,6 +74,7 @@ pip install requests DrissionPage
./.venv/bin/python XHS.py --max-videos 20 --max-runtime 600 ./.venv/bin/python XHS.py --max-videos 20 --max-runtime 600
# 长任务队列模式:适合下载大量视频,可中断后继续 # 长任务队列模式:适合下载大量视频,可中断后继续
# video-channel 对应网页顶部“视频”频道,通常比关键词搜索更适合大量下载
./.venv/bin/python XHS.py \ ./.venv/bin/python XHS.py \
--source video-channel \ --source video-channel \
--target-videos 1000 \ --target-videos 1000 \

9
XHS.py
View File

@ -17,7 +17,7 @@ from urllib.parse import urljoin
from dataclasses import replace from dataclasses import replace
DEFAULT_EXPLORE_URL = "https://www.xiaohongshu.com/explore" DEFAULT_EXPLORE_URL = "https://www.xiaohongshu.com/explore"
DEFAULT_VIDEO_CHANNEL_URL = "https://www.xiaohongshu.com/explore?channel_id=video" DEFAULT_VIDEO_CHANNEL_URL = "https://www.xiaohongshu.com/explore?channel_id=homefeed.video_v3"
DEFAULT_BROWSER_PORT = 9223 DEFAULT_BROWSER_PORT = 9223
DEFAULT_OUTPUT_DIR = Path("video") DEFAULT_OUTPUT_DIR = Path("video")
LISTEN_TARGET = "/api/sns/web/v1/feed" LISTEN_TARGET = "/api/sns/web/v1/feed"
@ -874,14 +874,15 @@ def run_queue_download(
else: else:
page.get(source_url) page.get(source_url)
human_pause(human_settings) human_pause(human_settings)
note_limit = max(50, target_videos * 3 if source == "search" else target_videos * 2) video_card_source = source in {"search", "video-channel"}
browse_rounds = 8 if source == "search" else 2 note_limit = max(50, target_videos * 3 if video_card_source else target_videos * 2)
browse_rounds = 8 if video_card_source else 2
note_urls = collect_note_urls_with_browse( note_urls = collect_note_urls_with_browse(
page, page,
limit=note_limit, limit=note_limit,
human_settings=human_settings, human_settings=human_settings,
rounds=browse_rounds, rounds=browse_rounds,
video_only=source == "search", video_only=video_card_source,
) )
records = merge_note_urls_into_queue(records, note_urls, source=source) records = merge_note_urls_into_queue(records, note_urls, source=source)
save_queue(queue_file, records) save_queue(queue_file, records)

View File

@ -296,7 +296,7 @@ class XhsModuleTests(unittest.TestCase):
self.assertEqual(module.build_source_url("explore"), module.DEFAULT_EXPLORE_URL) self.assertEqual(module.build_source_url("explore"), module.DEFAULT_EXPLORE_URL)
self.assertEqual( self.assertEqual(
module.build_source_url("video-channel"), module.build_source_url("video-channel"),
"https://www.xiaohongshu.com/explore?channel_id=video", "https://www.xiaohongshu.com/explore?channel_id=homefeed.video_v3",
) )
def test_build_source_url_supports_encoded_search_keyword(self) -> None: def test_build_source_url_supports_encoded_search_keyword(self) -> None: