douyin-crawler-poc/login_douyin.py

126 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import argparse
import socket
import subprocess
import sys
import time
from pathlib import Path
DEFAULT_RECOMMENDATION_URL = "https://www.douyin.com/"
DEFAULT_CHROME_PATH = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
DEFAULT_BROWSER_PORT = 9223
DEFAULT_PROFILE_DIR = Path(".douyin-chrome-profile")
def derive_chrome_app_path(chrome_path: str) -> str:
marker = ".app/"
if marker not in chrome_path:
return chrome_path
prefix, _ = chrome_path.split(marker, 1)
return f"{prefix}.app"
def build_login_command(
chrome_path: str,
profile_dir: Path,
browser_port: int,
user_url: str,
) -> list[str]:
app_path = derive_chrome_app_path(chrome_path)
return [
"open",
"-na",
app_path,
"--args",
f"--user-data-dir={profile_dir}",
f"--remote-debugging-port={browser_port}",
user_url,
]
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="启动可见 Chrome供抖音手动登录后附着抓取")
parser.add_argument("--chrome-path", default=DEFAULT_CHROME_PATH, help="Chrome 可执行文件路径")
parser.add_argument(
"--profile-dir",
default=str(DEFAULT_PROFILE_DIR),
help="Chrome 用户数据目录,默认复用项目内固定目录",
)
parser.add_argument(
"--browser-port",
type=int,
default=DEFAULT_BROWSER_PORT,
help="Chrome 调试端口,默认 9223",
)
parser.add_argument("--user-url", default=DEFAULT_RECOMMENDATION_URL, help="启动后打开的抖音页面 URL默认推荐流首页")
return parser
def launch_browser(command: list[str]) -> subprocess.Popen[str]:
return subprocess.Popen(command)
def wait_for_browser_debug_port(
browser_port: int,
timeout_seconds: float = 15.0,
interval_seconds: float = 0.25,
) -> None:
deadline = time.monotonic() + timeout_seconds
while time.monotonic() < deadline:
try:
with socket.create_connection(("127.0.0.1", browser_port), timeout=1):
return
except OSError:
time.sleep(interval_seconds)
raise RuntimeError(
f"Chrome 已启动命令,但调试端口 {browser_port} 在限定时间内未就绪。"
)
def main(argv: list[str] | None = None) -> int:
parser = build_parser()
args = parser.parse_args(argv)
if args.browser_port <= 0:
parser.error("--browser-port 必须大于 0")
chrome_path = Path(args.chrome_path)
if not chrome_path.exists():
print(f"[ERROR] Chrome 可执行文件不存在: {chrome_path}")
return 1
profile_dir = Path(args.profile_dir).resolve()
profile_dir.mkdir(parents=True, exist_ok=True)
command = build_login_command(
chrome_path=str(chrome_path),
profile_dir=profile_dir,
browser_port=args.browser_port,
user_url=args.user_url,
)
try:
launch_browser(command)
except OSError as exc:
print(f"[ERROR] 启动 Chrome 失败: {exc}")
return 1
try:
wait_for_browser_debug_port(args.browser_port)
except RuntimeError as exc:
print(f"[ERROR] {exc}")
return 1
print("[INFO] Chrome 已启动。请在打开的浏览器中完成抖音登录和验证码。")
next_command = "./.venv/bin/python Douyin.py"
if args.browser_port != DEFAULT_BROWSER_PORT:
next_command = f"{next_command} --browser-port {args.browser_port}"
print(f"[INFO] 登录完成后执行: {next_command}")
return 0
if __name__ == "__main__":
sys.exit(main())