From 4fb4131217f4f5b77cba20acc064b33ee44e1959 Mon Sep 17 00:00:00 2001 From: wangshaoqing Date: Wed, 6 May 2026 19:12:01 +0800 Subject: [PATCH] feat: add human-like random scrolling to avoid detection --- Douyin.py | 17 +++++++++++++---- test_douyin.py | 3 +++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/Douyin.py b/Douyin.py index f696cce..6440803 100644 --- a/Douyin.py +++ b/Douyin.py @@ -12,6 +12,7 @@ from __future__ import annotations import argparse import json +import random import re import socket import sys @@ -296,6 +297,14 @@ def scroll_to_next_page(page: Any) -> None: time.sleep(2) +def human_like_scroll(page: Any) -> None: + """模拟人类滚动行为:随机滚动距离和随机停顿时间""" + scroll_distance = random.randint(300, 800) + page.run_js(f"window.scrollBy(0, {scroll_distance});") + sleep_time = random.uniform(1.5, 4.0) + time.sleep(sleep_time) + + def download_video( requests_module: Any, headers: dict[str, str], @@ -417,7 +426,7 @@ def collect_recommendations( if consecutive_empty >= max_consecutive_empty: print("[INFO] 连续多次未获取到新数据,结束抓取。") break - scroll_to_next_page(page) + human_like_scroll(page) continue try: @@ -428,14 +437,14 @@ def collect_recommendations( consecutive_empty += 1 if consecutive_empty >= max_consecutive_empty: break - scroll_to_next_page(page) + human_like_scroll(page) continue if not items: consecutive_empty += 1 if consecutive_empty >= max_consecutive_empty: break - scroll_to_next_page(page) + human_like_scroll(page) continue consecutive_empty = 0 @@ -476,7 +485,7 @@ def collect_recommendations( if consecutive_empty >= max_consecutive_empty: break - scroll_to_next_page(page) + human_like_scroll(page) return downloaded diff --git a/test_douyin.py b/test_douyin.py index 99d9dcb..c4fee10 100644 --- a/test_douyin.py +++ b/test_douyin.py @@ -54,6 +54,9 @@ class FakeRuntimePage: self.url = url def run_js(self, script): + # Allow both old scroll_to_next_page and new human_like_scroll + if "window.scrollTo" in script or "window.scrollBy" in script: + return raise AssertionError(f"unexpected scroll script: {script}")