feat: add human-like random scrolling to avoid detection
This commit is contained in:
parent
46499446b2
commit
4fb4131217
17
Douyin.py
17
Douyin.py
@ -12,6 +12,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
@ -296,6 +297,14 @@ def scroll_to_next_page(page: Any) -> None:
|
|||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
|
|
||||||
|
def human_like_scroll(page: Any) -> None:
|
||||||
|
"""模拟人类滚动行为:随机滚动距离和随机停顿时间"""
|
||||||
|
scroll_distance = random.randint(300, 800)
|
||||||
|
page.run_js(f"window.scrollBy(0, {scroll_distance});")
|
||||||
|
sleep_time = random.uniform(1.5, 4.0)
|
||||||
|
time.sleep(sleep_time)
|
||||||
|
|
||||||
|
|
||||||
def download_video(
|
def download_video(
|
||||||
requests_module: Any,
|
requests_module: Any,
|
||||||
headers: dict[str, str],
|
headers: dict[str, str],
|
||||||
@ -417,7 +426,7 @@ def collect_recommendations(
|
|||||||
if consecutive_empty >= max_consecutive_empty:
|
if consecutive_empty >= max_consecutive_empty:
|
||||||
print("[INFO] 连续多次未获取到新数据,结束抓取。")
|
print("[INFO] 连续多次未获取到新数据,结束抓取。")
|
||||||
break
|
break
|
||||||
scroll_to_next_page(page)
|
human_like_scroll(page)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -428,14 +437,14 @@ def collect_recommendations(
|
|||||||
consecutive_empty += 1
|
consecutive_empty += 1
|
||||||
if consecutive_empty >= max_consecutive_empty:
|
if consecutive_empty >= max_consecutive_empty:
|
||||||
break
|
break
|
||||||
scroll_to_next_page(page)
|
human_like_scroll(page)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not items:
|
if not items:
|
||||||
consecutive_empty += 1
|
consecutive_empty += 1
|
||||||
if consecutive_empty >= max_consecutive_empty:
|
if consecutive_empty >= max_consecutive_empty:
|
||||||
break
|
break
|
||||||
scroll_to_next_page(page)
|
human_like_scroll(page)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
consecutive_empty = 0
|
consecutive_empty = 0
|
||||||
@ -476,7 +485,7 @@ def collect_recommendations(
|
|||||||
if consecutive_empty >= max_consecutive_empty:
|
if consecutive_empty >= max_consecutive_empty:
|
||||||
break
|
break
|
||||||
|
|
||||||
scroll_to_next_page(page)
|
human_like_scroll(page)
|
||||||
|
|
||||||
return downloaded
|
return downloaded
|
||||||
|
|
||||||
|
|||||||
@ -54,6 +54,9 @@ class FakeRuntimePage:
|
|||||||
self.url = url
|
self.url = url
|
||||||
|
|
||||||
def run_js(self, script):
|
def run_js(self, script):
|
||||||
|
# Allow both old scroll_to_next_page and new human_like_scroll
|
||||||
|
if "window.scrollTo" in script or "window.scrollBy" in script:
|
||||||
|
return
|
||||||
raise AssertionError(f"unexpected scroll script: {script}")
|
raise AssertionError(f"unexpected scroll script: {script}")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user