feat: add human-like random scrolling to avoid detection

This commit is contained in:
wangshaoqing 2026-05-06 19:12:01 +08:00
parent 46499446b2
commit 4fb4131217
2 changed files with 16 additions and 4 deletions

View File

@ -12,6 +12,7 @@ from __future__ import annotations
import argparse
import json
import random
import re
import socket
import sys
@ -296,6 +297,14 @@ def scroll_to_next_page(page: Any) -> None:
time.sleep(2)
def human_like_scroll(page: Any) -> None:
"""模拟人类滚动行为:随机滚动距离和随机停顿时间"""
scroll_distance = random.randint(300, 800)
page.run_js(f"window.scrollBy(0, {scroll_distance});")
sleep_time = random.uniform(1.5, 4.0)
time.sleep(sleep_time)
def download_video(
requests_module: Any,
headers: dict[str, str],
@ -417,7 +426,7 @@ def collect_recommendations(
if consecutive_empty >= max_consecutive_empty:
print("[INFO] 连续多次未获取到新数据,结束抓取。")
break
scroll_to_next_page(page)
human_like_scroll(page)
continue
try:
@ -428,14 +437,14 @@ def collect_recommendations(
consecutive_empty += 1
if consecutive_empty >= max_consecutive_empty:
break
scroll_to_next_page(page)
human_like_scroll(page)
continue
if not items:
consecutive_empty += 1
if consecutive_empty >= max_consecutive_empty:
break
scroll_to_next_page(page)
human_like_scroll(page)
continue
consecutive_empty = 0
@ -476,7 +485,7 @@ def collect_recommendations(
if consecutive_empty >= max_consecutive_empty:
break
scroll_to_next_page(page)
human_like_scroll(page)
return downloaded

View File

@ -54,6 +54,9 @@ class FakeRuntimePage:
self.url = url
def run_js(self, script):
# Allow both old scroll_to_next_page and new human_like_scroll
if "window.scrollTo" in script or "window.scrollBy" in script:
return
raise AssertionError(f"unexpected scroll script: {script}")