From f7374d208840dd6e2dc8ee96e87762054e5ade3b Mon Sep 17 00:00:00 2001 From: wangshaoqing Date: Wed, 6 May 2026 17:15:19 +0800 Subject: [PATCH] feat: add recommendation URL recognition --- Douyin.py | 5 +++++ test_douyin.py | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/Douyin.py b/Douyin.py index 13187fd..1a7cba9 100644 --- a/Douyin.py +++ b/Douyin.py @@ -29,6 +29,7 @@ DEFAULT_BROWSER_PORT = 9223 LISTEN_TARGET = "web/aweme/post/" SINGLE_VIDEO_LISTEN_TARGET = "web/aweme/detail/" INVALID_FILENAME_CHARS = re.compile(r'[\\/:*?"<>|\r\n\t]') +RECOMMENDATION_URL_PATTERN = re.compile(r"^https?://www\.douyin\.com/?(?:\?.*)?$") CREATOR_URL_PATTERN = re.compile(r"^https?://www\.douyin\.com/user/[^/?#]+(?:\?.*)?$") VIDEO_URL_PATTERN = re.compile(r"^https?://www\.douyin\.com/video/(?P\d+)(?:[/?#].*)?$") AWEME_ID_PATTERN = re.compile(r"^\d{5,}$") @@ -47,6 +48,10 @@ def sanitize_filename(value: str, fallback: str = "untitled") -> str: return cleaned or fallback +def is_recommendation_url(value: str) -> bool: + return bool(RECOMMENDATION_URL_PATTERN.match(value.strip())) + + def is_creator_url(value: str) -> bool: return bool(CREATOR_URL_PATTERN.match(value.strip())) diff --git a/test_douyin.py b/test_douyin.py index b65177d..17ae59a 100644 --- a/test_douyin.py +++ b/test_douyin.py @@ -115,6 +115,14 @@ class DouyinModuleTests(unittest.TestCase): with self.assertRaisesRegex(RuntimeError, "login_douyin.py"): module.ensure_browser_debug_port_ready(9223) + def test_is_recommendation_url_accepts_douyin_homepage(self) -> None: + module = importlib.import_module("Douyin") + self.assertTrue(module.is_recommendation_url("https://www.douyin.com/")) + self.assertTrue(module.is_recommendation_url("https://www.douyin.com")) + self.assertTrue(module.is_recommendation_url("https://www.douyin.com/?from=web")) + self.assertFalse(module.is_recommendation_url("https://www.douyin.com/user/xxx")) + self.assertFalse(module.is_recommendation_url("https://www.douyin.com/video/123")) + def test_is_creator_url_accepts_supported_douyin_creator_url(self) -> None: module = importlib.import_module("Douyin") self.assertTrue(