feat: add human-like recommendation scrolling
This commit is contained in:
parent
452f14da69
commit
d0f6c5e5ab
170
Douyin.py
170
Douyin.py
@ -30,6 +30,7 @@ DEFAULT_BROWSER_PORT = 9223
|
|||||||
LISTEN_TARGET = "web/aweme/post/"
|
LISTEN_TARGET = "web/aweme/post/"
|
||||||
RECOMMENDATION_LISTEN_TARGET = "aweme/v2/web/module/feed/"
|
RECOMMENDATION_LISTEN_TARGET = "aweme/v2/web/module/feed/"
|
||||||
SINGLE_VIDEO_LISTEN_TARGET = "web/aweme/detail/"
|
SINGLE_VIDEO_LISTEN_TARGET = "web/aweme/detail/"
|
||||||
|
MAX_FILENAME_BYTES = 240
|
||||||
INVALID_FILENAME_CHARS = re.compile(r'[\\/:*?"<>|\r\n\t]')
|
INVALID_FILENAME_CHARS = re.compile(r'[\\/:*?"<>|\r\n\t]')
|
||||||
RECOMMENDATION_URL_PATTERN = re.compile(r"^https?://www\.douyin\.com/?(?:jingxuan)?(?:\?.*)?$")
|
RECOMMENDATION_URL_PATTERN = re.compile(r"^https?://www\.douyin\.com/?(?:jingxuan)?(?:\?.*)?$")
|
||||||
CREATOR_URL_PATTERN = re.compile(r"^https?://www\.douyin\.com/user/[^/?#]+(?:\?.*)?$")
|
CREATOR_URL_PATTERN = re.compile(r"^https?://www\.douyin\.com/user/[^/?#]+(?:\?.*)?$")
|
||||||
@ -45,11 +46,48 @@ class ResolvedTarget:
|
|||||||
aweme_id: str | None = None
|
aweme_id: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ScrollSettings:
|
||||||
|
mode: str = "human"
|
||||||
|
min_wait: float = 2.0
|
||||||
|
max_wait: float = 8.0
|
||||||
|
reverse_scroll_probability: float = 0.2
|
||||||
|
max_runtime: float = 600.0
|
||||||
|
min_scroll: int = 300
|
||||||
|
max_scroll: int = 900
|
||||||
|
min_reverse_scroll: int = 80
|
||||||
|
max_reverse_scroll: int = 250
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class HumanScrollPlan:
|
||||||
|
down_distance: int
|
||||||
|
down_wait: float
|
||||||
|
reverse_distance: int = 0
|
||||||
|
reverse_wait: float = 0.0
|
||||||
|
settle_wait: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
def sanitize_filename(value: str, fallback: str = "untitled") -> str:
|
def sanitize_filename(value: str, fallback: str = "untitled") -> str:
|
||||||
cleaned = INVALID_FILENAME_CHARS.sub("_", value).strip(" ._")
|
cleaned = INVALID_FILENAME_CHARS.sub("_", value).strip(" ._")
|
||||||
return cleaned or fallback
|
return cleaned or fallback
|
||||||
|
|
||||||
|
|
||||||
|
def truncate_utf8_bytes(value: str, max_bytes: int) -> str:
|
||||||
|
if len(value.encode("utf-8")) <= max_bytes:
|
||||||
|
return value
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
used = 0
|
||||||
|
for character in value:
|
||||||
|
character_bytes = len(character.encode("utf-8"))
|
||||||
|
if used + character_bytes > max_bytes:
|
||||||
|
break
|
||||||
|
result += character
|
||||||
|
used += character_bytes
|
||||||
|
return result.rstrip(" ._")
|
||||||
|
|
||||||
|
|
||||||
def is_recommendation_url(value: str) -> bool:
|
def is_recommendation_url(value: str) -> bool:
|
||||||
return bool(RECOMMENDATION_URL_PATTERN.match(value.strip()))
|
return bool(RECOMMENDATION_URL_PATTERN.match(value.strip()))
|
||||||
|
|
||||||
@ -181,11 +219,20 @@ def build_output_path(
|
|||||||
author_name: str | None = None,
|
author_name: str | None = None,
|
||||||
) -> Path:
|
) -> Path:
|
||||||
safe_title = sanitize_filename(title, fallback="untitled")
|
safe_title = sanitize_filename(title, fallback="untitled")
|
||||||
|
suffix = f"-{video_id}.mp4"
|
||||||
if author_name:
|
if author_name:
|
||||||
safe_author = sanitize_filename(author_name, fallback="unknown")
|
safe_author = sanitize_filename(author_name, fallback="unknown")
|
||||||
filename = f"[{safe_author}]{safe_title}-{video_id}.mp4"
|
prefix = f"[{safe_author}]"
|
||||||
else:
|
else:
|
||||||
filename = f"{safe_title}-{video_id}.mp4"
|
prefix = ""
|
||||||
|
|
||||||
|
title_budget = MAX_FILENAME_BYTES - len(prefix.encode("utf-8")) - len(suffix.encode("utf-8"))
|
||||||
|
if title_budget < 1:
|
||||||
|
prefix_budget = MAX_FILENAME_BYTES - len(suffix.encode("utf-8")) - 1
|
||||||
|
prefix = truncate_utf8_bytes(prefix, max(1, prefix_budget))
|
||||||
|
title_budget = MAX_FILENAME_BYTES - len(prefix.encode("utf-8")) - len(suffix.encode("utf-8"))
|
||||||
|
|
||||||
|
filename = f"{prefix}{truncate_utf8_bytes(safe_title, max(1, title_budget))}{suffix}"
|
||||||
return output_dir / filename
|
return output_dir / filename
|
||||||
|
|
||||||
|
|
||||||
@ -319,7 +366,8 @@ def create_page(chromium_page_cls: Any, chromium_options_cls: Any, browser_port:
|
|||||||
|
|
||||||
def wait_for_aweme_packet(page: Any, timeout: int) -> Any | None:
|
def wait_for_aweme_packet(page: Any, timeout: int) -> Any | None:
|
||||||
try:
|
try:
|
||||||
return page.listen.wait(timeout=timeout)
|
packet = page.listen.wait(timeout=timeout)
|
||||||
|
return packet if packet else None
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
print(f"[WARN] 等待接口数据超时或失败: {exc}")
|
print(f"[WARN] 等待接口数据超时或失败: {exc}")
|
||||||
return None
|
return None
|
||||||
@ -330,12 +378,52 @@ def scroll_to_next_page(page: Any) -> None:
|
|||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
|
|
||||||
def human_like_scroll(page: Any) -> None:
|
def create_human_scroll_plan(
|
||||||
"""模拟人类滚动行为:随机滚动距离和随机停顿时间"""
|
settings: ScrollSettings,
|
||||||
scroll_distance = random.randint(300, 800)
|
random_module: Any = random,
|
||||||
page.run_js(f"window.scrollBy(0, {scroll_distance});")
|
) -> HumanScrollPlan:
|
||||||
sleep_time = random.uniform(1.5, 4.0)
|
down_distance = random_module.randint(settings.min_scroll, settings.max_scroll)
|
||||||
time.sleep(sleep_time)
|
down_wait = random_module.uniform(settings.min_wait, settings.max_wait)
|
||||||
|
settle_wait = random_module.uniform(settings.min_wait, settings.max_wait)
|
||||||
|
|
||||||
|
reverse_distance = 0
|
||||||
|
reverse_wait = 0.0
|
||||||
|
if random_module.random() < settings.reverse_scroll_probability:
|
||||||
|
reverse_distance = random_module.randint(
|
||||||
|
settings.min_reverse_scroll,
|
||||||
|
settings.max_reverse_scroll,
|
||||||
|
)
|
||||||
|
reverse_wait = random_module.uniform(1.0, min(3.0, settings.max_wait))
|
||||||
|
|
||||||
|
return HumanScrollPlan(
|
||||||
|
down_distance=down_distance,
|
||||||
|
down_wait=down_wait,
|
||||||
|
reverse_distance=reverse_distance,
|
||||||
|
reverse_wait=reverse_wait,
|
||||||
|
settle_wait=settle_wait,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def run_human_scroll_sequence(page: Any, plan: HumanScrollPlan) -> None:
|
||||||
|
page.run_js(f"window.scrollBy(0, {plan.down_distance});")
|
||||||
|
print(f"[INFO] 向下滚动 {plan.down_distance}px,停留 {plan.down_wait:.1f}s")
|
||||||
|
time.sleep(plan.down_wait)
|
||||||
|
|
||||||
|
if plan.reverse_distance > 0:
|
||||||
|
page.run_js(f"window.scrollBy(0, -{plan.reverse_distance});")
|
||||||
|
print(f"[INFO] 小幅回滚 {plan.reverse_distance}px,停留 {plan.reverse_wait:.1f}s")
|
||||||
|
time.sleep(plan.reverse_wait)
|
||||||
|
forward_distance = plan.reverse_distance * 2
|
||||||
|
page.run_js(f"window.scrollBy(0, {forward_distance});")
|
||||||
|
|
||||||
|
if plan.settle_wait > 0:
|
||||||
|
print(f"[INFO] 继续停留 {plan.settle_wait:.1f}s")
|
||||||
|
time.sleep(plan.settle_wait)
|
||||||
|
|
||||||
|
|
||||||
|
def human_like_scroll(page: Any, settings: ScrollSettings | None = None) -> None:
|
||||||
|
scroll_settings = settings or ScrollSettings()
|
||||||
|
run_human_scroll_sequence(page, create_human_scroll_plan(scroll_settings))
|
||||||
|
|
||||||
|
|
||||||
def download_video(
|
def download_video(
|
||||||
@ -435,6 +523,7 @@ def collect_recommendations(
|
|||||||
timeout: int,
|
timeout: int,
|
||||||
output_dir: Path,
|
output_dir: Path,
|
||||||
browser_port: int | None,
|
browser_port: int | None,
|
||||||
|
scroll_settings: ScrollSettings | None = None,
|
||||||
) -> int:
|
) -> int:
|
||||||
requests_module, chromium_page_cls, chromium_options_cls = import_runtime_dependencies()
|
requests_module, chromium_page_cls, chromium_options_cls = import_runtime_dependencies()
|
||||||
headers = build_headers("https://www.douyin.com/")
|
headers = build_headers("https://www.douyin.com/")
|
||||||
@ -450,16 +539,22 @@ def collect_recommendations(
|
|||||||
downloaded = 0
|
downloaded = 0
|
||||||
seen_ids: set[str] = set()
|
seen_ids: set[str] = set()
|
||||||
consecutive_empty = 0
|
consecutive_empty = 0
|
||||||
max_consecutive_empty = 3
|
max_consecutive_empty = 6
|
||||||
|
settings = scroll_settings or ScrollSettings()
|
||||||
|
started_at = time.monotonic()
|
||||||
|
|
||||||
while downloaded < max_videos:
|
while downloaded < max_videos:
|
||||||
|
if settings.max_runtime > 0 and time.monotonic() - started_at >= settings.max_runtime:
|
||||||
|
print("[INFO] 已达到最大运行时间,结束抓取。")
|
||||||
|
break
|
||||||
|
|
||||||
packet = wait_for_aweme_packet(page, timeout=timeout)
|
packet = wait_for_aweme_packet(page, timeout=timeout)
|
||||||
if packet is None:
|
if packet is None:
|
||||||
consecutive_empty += 1
|
consecutive_empty += 1
|
||||||
if consecutive_empty >= max_consecutive_empty:
|
if consecutive_empty >= max_consecutive_empty:
|
||||||
print("[INFO] 连续多次未获取到新数据,结束抓取。")
|
print("[INFO] 连续多次未获取到新数据,结束抓取。")
|
||||||
break
|
break
|
||||||
human_like_scroll(page)
|
human_like_scroll(page, settings=settings)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -470,14 +565,14 @@ def collect_recommendations(
|
|||||||
consecutive_empty += 1
|
consecutive_empty += 1
|
||||||
if consecutive_empty >= max_consecutive_empty:
|
if consecutive_empty >= max_consecutive_empty:
|
||||||
break
|
break
|
||||||
human_like_scroll(page)
|
human_like_scroll(page, settings=settings)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not items:
|
if not items:
|
||||||
consecutive_empty += 1
|
consecutive_empty += 1
|
||||||
if consecutive_empty >= max_consecutive_empty:
|
if consecutive_empty >= max_consecutive_empty:
|
||||||
break
|
break
|
||||||
human_like_scroll(page)
|
human_like_scroll(page, settings=settings)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
consecutive_empty = 0
|
consecutive_empty = 0
|
||||||
@ -518,7 +613,7 @@ def collect_recommendations(
|
|||||||
if consecutive_empty >= max_consecutive_empty:
|
if consecutive_empty >= max_consecutive_empty:
|
||||||
break
|
break
|
||||||
|
|
||||||
human_like_scroll(page)
|
human_like_scroll(page, settings=settings)
|
||||||
|
|
||||||
return downloaded
|
return downloaded
|
||||||
|
|
||||||
@ -596,6 +691,36 @@ def build_parser() -> argparse.ArgumentParser:
|
|||||||
default=50,
|
default=50,
|
||||||
help="推荐流最大抓取数量,默认 50",
|
help="推荐流最大抓取数量,默认 50",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--scroll-mode",
|
||||||
|
choices=["human"],
|
||||||
|
default="human",
|
||||||
|
help="推荐流滚动模式,默认 human",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--min-wait",
|
||||||
|
type=float,
|
||||||
|
default=2.0,
|
||||||
|
help="推荐流每次滚动后的最短等待秒数,默认 2",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-wait",
|
||||||
|
type=float,
|
||||||
|
default=8.0,
|
||||||
|
help="推荐流每次滚动后的最长等待秒数,默认 8",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--reverse-scroll-probability",
|
||||||
|
type=float,
|
||||||
|
default=0.2,
|
||||||
|
help="推荐流小幅回滚概率,取值 0 到 1,默认 0.2",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-runtime",
|
||||||
|
type=float,
|
||||||
|
default=600.0,
|
||||||
|
help="推荐流最大运行秒数,默认 600;设置为 0 表示不限制",
|
||||||
|
)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
@ -611,6 +736,22 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
parser.error("--browser-port 必须大于 0")
|
parser.error("--browser-port 必须大于 0")
|
||||||
if args.max_videos <= 0:
|
if args.max_videos <= 0:
|
||||||
parser.error("--max-videos 必须大于 0")
|
parser.error("--max-videos 必须大于 0")
|
||||||
|
if args.min_wait < 0:
|
||||||
|
parser.error("--min-wait 不能小于 0")
|
||||||
|
if args.max_wait < args.min_wait:
|
||||||
|
parser.error("--max-wait 必须大于或等于 --min-wait")
|
||||||
|
if not 0 <= args.reverse_scroll_probability <= 1:
|
||||||
|
parser.error("--reverse-scroll-probability 必须在 0 到 1 之间")
|
||||||
|
if args.max_runtime < 0:
|
||||||
|
parser.error("--max-runtime 不能小于 0")
|
||||||
|
|
||||||
|
scroll_settings = ScrollSettings(
|
||||||
|
mode=args.scroll_mode,
|
||||||
|
min_wait=args.min_wait,
|
||||||
|
max_wait=args.max_wait,
|
||||||
|
reverse_scroll_probability=args.reverse_scroll_probability,
|
||||||
|
max_runtime=args.max_runtime,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
target = resolve_cli_target(args.target, browser_port=args.browser_port)
|
target = resolve_cli_target(args.target, browser_port=args.browser_port)
|
||||||
@ -629,6 +770,7 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
timeout=args.timeout,
|
timeout=args.timeout,
|
||||||
output_dir=Path(args.output_dir),
|
output_dir=Path(args.output_dir),
|
||||||
browser_port=args.browser_port,
|
browser_port=args.browser_port,
|
||||||
|
scroll_settings=scroll_settings,
|
||||||
)
|
)
|
||||||
elif target.kind == "single-video":
|
elif target.kind == "single-video":
|
||||||
total = collect_single_video(
|
total = collect_single_video(
|
||||||
|
|||||||
103
test_douyin.py
103
test_douyin.py
@ -60,6 +60,14 @@ class FakeRuntimePage:
|
|||||||
raise AssertionError(f"unexpected scroll script: {script}")
|
raise AssertionError(f"unexpected scroll script: {script}")
|
||||||
|
|
||||||
|
|
||||||
|
class FakeScrollPage:
|
||||||
|
def __init__(self):
|
||||||
|
self.scripts = []
|
||||||
|
|
||||||
|
def run_js(self, script):
|
||||||
|
self.scripts.append(script)
|
||||||
|
|
||||||
|
|
||||||
class DouyinModuleTests(unittest.TestCase):
|
class DouyinModuleTests(unittest.TestCase):
|
||||||
def test_module_can_import_without_optional_runtime_dependencies(self) -> None:
|
def test_module_can_import_without_optional_runtime_dependencies(self) -> None:
|
||||||
module = importlib.import_module("Douyin")
|
module = importlib.import_module("Douyin")
|
||||||
@ -98,6 +106,16 @@ class DouyinModuleTests(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(output_path.as_posix(), "video/[测试博主]测试标题-123456.mp4")
|
self.assertEqual(output_path.as_posix(), "video/[测试博主]测试标题-123456.mp4")
|
||||||
|
|
||||||
|
def test_build_output_path_limits_long_filename(self) -> None:
|
||||||
|
module = importlib.import_module("Douyin")
|
||||||
|
output_path = module.build_output_path(
|
||||||
|
title="超长标题" * 100,
|
||||||
|
video_id="7619989983668240802",
|
||||||
|
author_name="超长博主名" * 20,
|
||||||
|
)
|
||||||
|
self.assertLessEqual(len(output_path.name.encode("utf-8")), 240)
|
||||||
|
self.assertTrue(output_path.name.endswith("-7619989983668240802.mp4"))
|
||||||
|
|
||||||
def test_extract_aweme_payload_uses_dict_body(self) -> None:
|
def test_extract_aweme_payload_uses_dict_body(self) -> None:
|
||||||
module = importlib.import_module("Douyin")
|
module = importlib.import_module("Douyin")
|
||||||
response = FakeResponse({"aweme_list": []}, "")
|
response = FakeResponse({"aweme_list": []}, "")
|
||||||
@ -111,11 +129,71 @@ class DouyinModuleTests(unittest.TestCase):
|
|||||||
{"aweme_list": [{"aweme_id": "1"}]},
|
{"aweme_list": [{"aweme_id": "1"}]},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_wait_for_aweme_packet_treats_false_listener_result_as_missing(self) -> None:
|
||||||
|
module = importlib.import_module("Douyin")
|
||||||
|
page = mock.MagicMock()
|
||||||
|
page.listen.wait.return_value = False
|
||||||
|
self.assertIsNone(module.wait_for_aweme_packet(page, timeout=10))
|
||||||
|
|
||||||
def test_build_browser_address_from_port(self) -> None:
|
def test_build_browser_address_from_port(self) -> None:
|
||||||
module = importlib.import_module("Douyin")
|
module = importlib.import_module("Douyin")
|
||||||
self.assertEqual(module.build_browser_address(9223), "127.0.0.1:9223")
|
self.assertEqual(module.build_browser_address(9223), "127.0.0.1:9223")
|
||||||
self.assertIsNone(module.build_browser_address(None))
|
self.assertIsNone(module.build_browser_address(None))
|
||||||
|
|
||||||
|
def test_default_scroll_settings_uses_human_mode(self) -> None:
|
||||||
|
module = importlib.import_module("Douyin")
|
||||||
|
settings = module.ScrollSettings()
|
||||||
|
self.assertEqual(settings.mode, "human")
|
||||||
|
self.assertEqual(settings.min_wait, 2.0)
|
||||||
|
self.assertEqual(settings.max_wait, 8.0)
|
||||||
|
self.assertEqual(settings.reverse_scroll_probability, 0.2)
|
||||||
|
|
||||||
|
def test_create_human_scroll_plan_uses_configured_ranges(self) -> None:
|
||||||
|
module = importlib.import_module("Douyin")
|
||||||
|
settings = module.ScrollSettings(
|
||||||
|
min_wait=2.0,
|
||||||
|
max_wait=4.0,
|
||||||
|
min_scroll=300,
|
||||||
|
max_scroll=900,
|
||||||
|
reverse_scroll_probability=0.0,
|
||||||
|
)
|
||||||
|
plan = module.create_human_scroll_plan(settings, random_module=module.random.Random(7))
|
||||||
|
self.assertGreaterEqual(plan.down_distance, 300)
|
||||||
|
self.assertLessEqual(plan.down_distance, 900)
|
||||||
|
self.assertGreaterEqual(plan.down_wait, 2.0)
|
||||||
|
self.assertLessEqual(plan.down_wait, 4.0)
|
||||||
|
self.assertEqual(plan.reverse_distance, 0)
|
||||||
|
|
||||||
|
def test_create_human_scroll_plan_can_include_reverse_scroll(self) -> None:
|
||||||
|
module = importlib.import_module("Douyin")
|
||||||
|
settings = module.ScrollSettings(reverse_scroll_probability=1.0)
|
||||||
|
plan = module.create_human_scroll_plan(settings, random_module=module.random.Random(3))
|
||||||
|
self.assertGreaterEqual(plan.reverse_distance, 80)
|
||||||
|
self.assertLessEqual(plan.reverse_distance, 250)
|
||||||
|
self.assertGreater(plan.reverse_wait, 0)
|
||||||
|
|
||||||
|
def test_run_human_scroll_sequence_scrolls_down_and_optionally_back_up(self) -> None:
|
||||||
|
module = importlib.import_module("Douyin")
|
||||||
|
page = FakeScrollPage()
|
||||||
|
plan = module.HumanScrollPlan(
|
||||||
|
down_distance=500,
|
||||||
|
down_wait=2.5,
|
||||||
|
reverse_distance=120,
|
||||||
|
reverse_wait=1.0,
|
||||||
|
settle_wait=3.0,
|
||||||
|
)
|
||||||
|
with mock.patch.object(module.time, "sleep") as mocked_sleep:
|
||||||
|
module.run_human_scroll_sequence(page, plan)
|
||||||
|
self.assertEqual(
|
||||||
|
page.scripts,
|
||||||
|
[
|
||||||
|
"window.scrollBy(0, 500);",
|
||||||
|
"window.scrollBy(0, -120);",
|
||||||
|
"window.scrollBy(0, 240);",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
mocked_sleep.assert_has_calls([mock.call(2.5), mock.call(1.0), mock.call(3.0)])
|
||||||
|
|
||||||
def test_ensure_browser_debug_port_ready_accepts_open_port(self) -> None:
|
def test_ensure_browser_debug_port_ready_accepts_open_port(self) -> None:
|
||||||
module = importlib.import_module("Douyin")
|
module = importlib.import_module("Douyin")
|
||||||
connection = mock.MagicMock()
|
connection = mock.MagicMock()
|
||||||
@ -367,7 +445,7 @@ class DouyinModuleTests(unittest.TestCase):
|
|||||||
with mock.patch.object(module, "import_runtime_dependencies", return_value=(object(), object(), object())):
|
with mock.patch.object(module, "import_runtime_dependencies", return_value=(object(), object(), object())):
|
||||||
with mock.patch.object(module, "create_page", return_value=page):
|
with mock.patch.object(module, "create_page", return_value=page):
|
||||||
with mock.patch.object(module, "download_video") as mocked_download:
|
with mock.patch.object(module, "download_video") as mocked_download:
|
||||||
with mock.patch.object(module, "scroll_to_next_page"):
|
with mock.patch.object(module, "human_like_scroll"):
|
||||||
downloaded = module.collect_recommendations(
|
downloaded = module.collect_recommendations(
|
||||||
max_videos=50,
|
max_videos=50,
|
||||||
timeout=10,
|
timeout=10,
|
||||||
@ -455,6 +533,28 @@ class DouyinModuleTests(unittest.TestCase):
|
|||||||
args = module.build_parser().parse_args(["--max-videos", "30"])
|
args = module.build_parser().parse_args(["--max-videos", "30"])
|
||||||
self.assertEqual(args.max_videos, 30)
|
self.assertEqual(args.max_videos, 30)
|
||||||
|
|
||||||
|
def test_build_parser_has_human_scroll_arguments(self) -> None:
|
||||||
|
module = importlib.import_module("Douyin")
|
||||||
|
args = module.build_parser().parse_args(
|
||||||
|
[
|
||||||
|
"--scroll-mode",
|
||||||
|
"human",
|
||||||
|
"--min-wait",
|
||||||
|
"3",
|
||||||
|
"--max-wait",
|
||||||
|
"9",
|
||||||
|
"--reverse-scroll-probability",
|
||||||
|
"0.4",
|
||||||
|
"--max-runtime",
|
||||||
|
"600",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
self.assertEqual(args.scroll_mode, "human")
|
||||||
|
self.assertEqual(args.min_wait, 3)
|
||||||
|
self.assertEqual(args.max_wait, 9)
|
||||||
|
self.assertEqual(args.reverse_scroll_probability, 0.4)
|
||||||
|
self.assertEqual(args.max_runtime, 600)
|
||||||
|
|
||||||
def test_build_parser_defaults_to_zero_argument_current_page_flow(self) -> None:
|
def test_build_parser_defaults_to_zero_argument_current_page_flow(self) -> None:
|
||||||
module = importlib.import_module("Douyin")
|
module = importlib.import_module("Douyin")
|
||||||
args = module.build_parser().parse_args([])
|
args = module.build_parser().parse_args([])
|
||||||
@ -488,6 +588,7 @@ class DouyinModuleTests(unittest.TestCase):
|
|||||||
timeout=10,
|
timeout=10,
|
||||||
output_dir=module.Path("video"),
|
output_dir=module.Path("video"),
|
||||||
browser_port=9223,
|
browser_port=9223,
|
||||||
|
scroll_settings=module.ScrollSettings(),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_main_without_target_dispatches_current_page_creator_flow(self) -> None:
|
def test_main_without_target_dispatches_current_page_creator_flow(self) -> None:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user