import importlib.util import subprocess import sys import unittest from pathlib import Path from unittest import mock ROOT = Path("/Users/wangshaoqing/Desktop/MiaoSi/Study/douyin-crawler-poc") def load_module(module_name: str, relative_path: str): file_path = ROOT / relative_path spec = importlib.util.spec_from_file_location(module_name, file_path) if spec is None or spec.loader is None: raise RuntimeError(f"无法加载模块: {relative_path}") module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) return module class FakePacketResponse: def __init__(self, body): self.body = body self.raw_body = "" class FakePacket: def __init__(self, body): self.response = FakePacketResponse(body) class FakeListener: def __init__(self, packet): self.packet = packet self.targets = [] def start(self, target): self.targets.append(target) def wait(self, timeout): return self.packet class FakePage: def __init__(self, url: str, packet): self.url = url self.listen = FakeListener(packet) self.visited_urls = [] def get(self, url: str): self.visited_urls.append(url) self.url = url class LearningParseAwemeTests(unittest.TestCase): def test_choose_video_url_prefers_douyinvod_link(self) -> None: module = load_module("learning04", "learning/drissionpage_basics/04_parse_aweme.py") self.assertEqual( module.choose_video_url( [ "https://example.com/play/first", "https://v11-weba.douyinvod.com/example/first.mp4", ] ), "https://v11-weba.douyinvod.com/example/first.mp4", ) def test_parse_aweme_items_extracts_expected_fields(self) -> None: module = load_module("learning04", "learning/drissionpage_basics/04_parse_aweme.py") items = module.parse_aweme_items(module.SAMPLE_PAYLOAD) self.assertEqual( items, [ { "title": "第一个示例视频", "video_id": "7500000000000000001", "video_url": "https://v11-weba.douyinvod.com/example/first.mp4", }, { "title": "第二个示例视频", "video_id": "7500000000000000002", "video_url": "https://v26-web.douyinvod.com/example/second.mp4", }, ], ) class LearningDownloadVideoTests(unittest.TestCase): def test_build_output_path_uses_learning_output_directory(self) -> None: module = load_module("learning05", "learning/drissionpage_basics/05_download_video.py") output_path = module.build_output_path("示例标题", "123456") self.assertEqual( output_path.as_posix(), "learning/drissionpage_basics/output/示例标题-123456.mp4", ) def test_download_first_real_video_uses_first_item_from_packet(self) -> None: module = load_module("learning05", "learning/drissionpage_basics/05_download_video.py") packet = FakePacket( { "aweme_list": [ { "aweme_id": "7619989983668240802", "desc": "单条练习视频", "video": { "play_addr": { "url_list": ["https://v26-web.douyinvod.com/example/single.mp4"] } }, } ] } ) page = FakePage( "https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main", packet, ) requests_module = object() with mock.patch.object(module, "download_video") as mocked_download: output_path = module.download_first_real_video( page=page, requests_module=requests_module, output_dir=module.Path("learning/drissionpage_basics/output"), timeout=15, ) self.assertEqual(page.listen.targets, [module.LISTEN_TARGET]) self.assertEqual( page.visited_urls, ["https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main"], ) self.assertEqual( output_path.as_posix(), "learning/drissionpage_basics/output/单条练习视频-7619989983668240802.mp4", ) mocked_download.assert_called_once_with( requests_module=requests_module, headers=mock.ANY, video_url="https://v26-web.douyinvod.com/example/single.mp4", output_path=output_path, ) def test_download_first_real_video_raises_readable_error_when_listener_returns_false(self) -> None: module = load_module("learning05", "learning/drissionpage_basics/05_download_video.py") page = FakePage( "https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main", False, ) with self.assertRaisesRegex(RuntimeError, "没有监听到作品接口"): module.download_first_real_video( page=page, requests_module=object(), output_dir=module.Path("learning/drissionpage_basics/output"), timeout=15, ) class LearningScriptImportTests(unittest.TestCase): def test_learning_directory_layout_has_both_tracks(self) -> None: expected_paths = [ ROOT / "learning" / "README.md", ROOT / "learning" / "drissionpage_basics" / "README.md", ROOT / "learning" / "playwright_basics" / "README.md", ROOT / "learning" / "network_basics" / "README.md", ROOT / "learning" / "network_basics" / "01_open_devtools_check_xhr.md", ROOT / "learning" / "network_basics" / "02_find_aweme_list_api.md", ROOT / "learning" / "network_basics" / "03_read_headers_and_cookies.md", ROOT / "learning" / "network_basics" / "04_analyze_pagination.md", ROOT / "learning" / "network_basics" / "05_copy_as_curl_and_replay.md", ROOT / "learning" / "playwright_basics" / "01_open_page.py", ROOT / "learning" / "playwright_basics" / "02_persistent_context.py", ROOT / "learning" / "playwright_basics" / "03_wait_and_locate.py", ROOT / "learning" / "playwright_basics" / "04_listen_response.py", ROOT / "learning" / "playwright_basics" / "05_download_video.py", ] for path in expected_paths: self.assertTrue(path.exists(), msg=f"缺少学习文件: {path}") def assert_script_can_import_project_root(self, relative_path: str) -> None: script_path = ROOT / relative_path command = ( "import runpy, sys; " f"root = {str(ROOT)!r}; " "sys.path = [p for p in sys.path if p not in ('', root)]; " f"runpy.run_path({str(script_path)!r}, run_name='not_main')" ) result = subprocess.run( [sys.executable, "-c", command], cwd=ROOT, capture_output=True, text=True, ) self.assertEqual(result.returncode, 0, msg=result.stderr) def test_attach_browser_script_can_import_project_modules_when_run_from_learning(self) -> None: self.assert_script_can_import_project_root("learning/drissionpage_basics/02_attach_browser.py") def test_listen_api_script_can_import_project_modules_when_run_from_learning(self) -> None: self.assert_script_can_import_project_root("learning/drissionpage_basics/03_listen_api.py") def test_download_video_script_can_import_project_modules_when_run_from_learning(self) -> None: self.assert_script_can_import_project_root("learning/drissionpage_basics/05_download_video.py") def test_playwright_persistent_context_script_can_import_project_modules(self) -> None: self.assert_script_can_import_project_root("learning/playwright_basics/02_persistent_context.py") def test_playwright_listen_response_script_can_import_project_modules(self) -> None: self.assert_script_can_import_project_root("learning/playwright_basics/04_listen_response.py") def test_playwright_download_video_script_can_import_project_modules(self) -> None: self.assert_script_can_import_project_root("learning/playwright_basics/05_download_video.py") class FakePlaywrightPage: def __init__(self, url: str = "https://example.com/"): self.url = url class FakePlaywrightContext: def __init__(self, pages): self.pages = pages self.created_pages = [] def new_page(self): page = FakePlaywrightPage("https://created.example.com/") self.created_pages.append(page) self.pages.append(page) return page class FakePlaywrightResponse: def __init__(self, url: str, payload=None, status: int = 200): self.url = url self.status = status self._payload = payload def json(self): if isinstance(self._payload, Exception): raise self._payload return self._payload class PlaywrightLearningHelperTests(unittest.TestCase): def test_persistent_context_reuses_first_existing_page(self) -> None: module = load_module("playwright02", "learning/playwright_basics/02_persistent_context.py") page = FakePlaywrightPage("https://existing.example.com/") context = FakePlaywrightContext([page]) self.assertIs(module.get_or_create_page(context), page) self.assertEqual(context.created_pages, []) def test_persistent_context_creates_page_when_context_is_empty(self) -> None: module = load_module("playwright02", "learning/playwright_basics/02_persistent_context.py") context = FakePlaywrightContext([]) page = module.get_or_create_page(context) self.assertEqual(page.url, "https://created.example.com/") self.assertEqual(len(context.created_pages), 1) def test_listen_response_target_matching_uses_url_substring(self) -> None: module = load_module("playwright04", "learning/playwright_basics/04_listen_response.py") self.assertTrue(module.is_target_response_url("https://www.douyin.com/aweme/v1/web/aweme/post/")) self.assertFalse(module.is_target_response_url("https://www.example.com/api")) def test_try_read_json_payload_returns_none_on_json_error(self) -> None: module = load_module("playwright04", "learning/playwright_basics/04_listen_response.py") response = FakePlaywrightResponse( "https://www.douyin.com/aweme/v1/web/aweme/post/", payload=ValueError("bad json"), ) self.assertIsNone(module.try_read_json_payload(response)) def test_build_output_path_uses_playwright_output_directory(self) -> None: module = load_module("playwright05", "learning/playwright_basics/05_download_video.py") output_path = module.build_output_path("示例标题", "123456") self.assertEqual( output_path.as_posix(), "learning/playwright_basics/output/示例标题-123456.mp4", ) def test_extract_first_item_from_payload_uses_existing_parser(self) -> None: module = load_module("playwright05", "learning/playwright_basics/05_download_video.py") item = module.extract_first_item_from_payload( { "aweme_list": [ { "aweme_id": "7619989983668240802", "desc": "Playwright 示例", "video": { "play_addr": { "url_list": ["https://v26-web.douyinvod.com/example/single.mp4"] } }, } ] } ) self.assertEqual( item, { "title": "Playwright 示例", "video_id": "7619989983668240802", "video_url": "https://v26-web.douyinvod.com/example/single.mp4", }, ) if __name__ == "__main__": unittest.main()