diff --git a/Douyin.py b/Douyin.py index 6440803..da6a6ba 100644 --- a/Douyin.py +++ b/Douyin.py @@ -140,6 +140,40 @@ def choose_video_url(url_list: list[str]) -> str: raise ValueError("url_list 为空,无法选择视频地址。") +def extract_url_list_from_play_addr(play_addr: Any) -> list[str]: + if not isinstance(play_addr, dict): + return [] + + url_list = play_addr.get("url_list") or [] + if not isinstance(url_list, list): + return [] + + return [str(url) for url in url_list if str(url).strip()] + + +def extract_video_url_list(video: Any) -> list[str]: + if not isinstance(video, dict): + return [] + + for address_key in ("play_addr", "play_addr_h264", "play_addr_lowbr"): + url_list = extract_url_list_from_play_addr(video.get(address_key)) + if url_list: + return url_list + + bit_rate_list = video.get("bit_rate") or [] + if not isinstance(bit_rate_list, list): + return [] + + for bit_rate in bit_rate_list: + if not isinstance(bit_rate, dict): + continue + url_list = extract_url_list_from_play_addr(bit_rate.get("play_addr")) + if url_list: + return url_list + + return [] + + def build_output_path( title: str, video_id: str, @@ -201,8 +235,7 @@ def parse_aweme_items(body: Any) -> list[dict[str, str]]: continue video = aweme.get("video") or {} - play_addr = video.get("play_addr") or {} - url_list = play_addr.get("url_list") or [] + url_list = extract_video_url_list(video) if not url_list: continue @@ -220,7 +253,7 @@ def parse_aweme_items(body: Any) -> list[dict[str, str]]: { "title": title, "video_id": video_id, - "video_url": choose_video_url([str(url) for url in url_list]), + "video_url": choose_video_url(url_list), "author_name": author_name, "author_id": author_id, } diff --git a/login_douyin.py b/login_douyin.py index 7664739..25b9ccc 100644 --- a/login_douyin.py +++ b/login_douyin.py @@ -8,6 +8,7 @@ import time from pathlib import Path DEFAULT_RECOMMENDATION_URL = "https://www.douyin.com/" +DEFAULT_USER_URL = DEFAULT_RECOMMENDATION_URL DEFAULT_CHROME_PATH = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" DEFAULT_BROWSER_PORT = 9223 diff --git a/test_douyin.py b/test_douyin.py index c4fee10..7841000 100644 --- a/test_douyin.py +++ b/test_douyin.py @@ -294,6 +294,49 @@ class DouyinModuleTests(unittest.TestCase): self.assertEqual(items[0]["author_name"], "测试博主") self.assertEqual(items[0]["author_id"], "123456789") + def test_parse_aweme_items_uses_play_addr_h264_when_play_addr_is_missing(self) -> None: + module = importlib.import_module("Douyin") + payload = { + "aweme_list": [ + { + "aweme_id": "7619989983668240802", + "desc": "推荐视频", + "video": { + "play_addr_h264": { + "url_list": ["https://v26-web.douyinvod.com/example/h264.mp4"] + } + }, + } + ] + } + items = module.parse_aweme_items(payload) + self.assertEqual(len(items), 1) + self.assertEqual(items[0]["video_url"], "https://v26-web.douyinvod.com/example/h264.mp4") + + def test_parse_aweme_items_uses_bit_rate_play_addr_when_top_level_addresses_are_missing(self) -> None: + module = importlib.import_module("Douyin") + payload = { + "aweme_list": [ + { + "aweme_id": "7619989983668240802", + "desc": "推荐视频", + "video": { + "bit_rate": [ + { + "format": "mp4", + "play_addr": { + "url_list": ["https://v11-weba.douyinvod.com/example/bitrate.mp4"] + }, + } + ] + }, + } + ] + } + items = module.parse_aweme_items(payload) + self.assertEqual(len(items), 1) + self.assertEqual(items[0]["video_url"], "https://v11-weba.douyinvod.com/example/bitrate.mp4") + def test_build_video_page_url_uses_aweme_id(self) -> None: module = importlib.import_module("Douyin") self.assertEqual( diff --git a/test_learning_examples.py b/test_learning_examples.py index f134834..3fa4b93 100644 --- a/test_learning_examples.py +++ b/test_learning_examples.py @@ -299,6 +299,8 @@ class PlaywrightLearningHelperTests(unittest.TestCase): "title": "Playwright 示例", "video_id": "7619989983668240802", "video_url": "https://v26-web.douyinvod.com/example/single.mp4", + "author_name": "unknown", + "author_id": "unknown", }, )