diff --git a/Douyin.py b/Douyin.py index f696cce..6440803 100644 --- a/Douyin.py +++ b/Douyin.py @@ -12,6 +12,7 @@ from __future__ import annotations import argparse import json +import random import re import socket import sys @@ -296,6 +297,14 @@ def scroll_to_next_page(page: Any) -> None: time.sleep(2) +def human_like_scroll(page: Any) -> None: + """模拟人类滚动行为:随机滚动距离和随机停顿时间""" + scroll_distance = random.randint(300, 800) + page.run_js(f"window.scrollBy(0, {scroll_distance});") + sleep_time = random.uniform(1.5, 4.0) + time.sleep(sleep_time) + + def download_video( requests_module: Any, headers: dict[str, str], @@ -417,7 +426,7 @@ def collect_recommendations( if consecutive_empty >= max_consecutive_empty: print("[INFO] 连续多次未获取到新数据,结束抓取。") break - scroll_to_next_page(page) + human_like_scroll(page) continue try: @@ -428,14 +437,14 @@ def collect_recommendations( consecutive_empty += 1 if consecutive_empty >= max_consecutive_empty: break - scroll_to_next_page(page) + human_like_scroll(page) continue if not items: consecutive_empty += 1 if consecutive_empty >= max_consecutive_empty: break - scroll_to_next_page(page) + human_like_scroll(page) continue consecutive_empty = 0 @@ -476,7 +485,7 @@ def collect_recommendations( if consecutive_empty >= max_consecutive_empty: break - scroll_to_next_page(page) + human_like_scroll(page) return downloaded diff --git a/test_douyin.py b/test_douyin.py index 99d9dcb..c4fee10 100644 --- a/test_douyin.py +++ b/test_douyin.py @@ -54,6 +54,9 @@ class FakeRuntimePage: self.url = url def run_js(self, script): + # Allow both old scroll_to_next_page and new human_like_scroll + if "window.scrollTo" in script or "window.scrollBy" in script: + return raise AssertionError(f"unexpected scroll script: {script}")