412 lines
18 KiB
Python
412 lines
18 KiB
Python
import importlib
|
|
import io
|
|
import unittest
|
|
from contextlib import redirect_stdout
|
|
from unittest import mock
|
|
|
|
|
|
class FakeResponse:
|
|
def __init__(self, body, raw_body):
|
|
self.body = body
|
|
self.raw_body = raw_body
|
|
|
|
|
|
class FakePage:
|
|
def __init__(self, url: str):
|
|
self.url = url
|
|
|
|
|
|
class FakePacketResponse:
|
|
def __init__(self, body):
|
|
self.body = body
|
|
self.raw_body = ""
|
|
|
|
|
|
class FakePacket:
|
|
def __init__(self, body):
|
|
self.response = FakePacketResponse(body)
|
|
|
|
|
|
class FakeListener:
|
|
def __init__(self, packet):
|
|
self.packet = packet
|
|
self.started_targets = []
|
|
|
|
def start(self, target):
|
|
self.started_targets.append(target)
|
|
|
|
def wait(self, timeout):
|
|
return self.packet
|
|
|
|
|
|
class FakeRuntimePage:
|
|
def __init__(self, url: str, packet):
|
|
self.url = url
|
|
self.listen = FakeListener(packet)
|
|
self.visited_urls = []
|
|
|
|
def get(self, url):
|
|
self.visited_urls.append(url)
|
|
self.url = url
|
|
|
|
def run_js(self, script):
|
|
raise AssertionError(f"unexpected scroll script: {script}")
|
|
|
|
|
|
class DouyinModuleTests(unittest.TestCase):
|
|
def test_module_can_import_without_optional_runtime_dependencies(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
self.assertIsNotNone(module)
|
|
|
|
def test_sanitize_filename_removes_invalid_characters(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
self.assertEqual(
|
|
module.sanitize_filename('a/b:c*?d"e<f>g|h\n'),
|
|
"a_b_c__d_e_f_g_h",
|
|
)
|
|
|
|
def test_choose_video_url_prefers_douyinvod_link(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
urls = [
|
|
"https://www.douyin.com/aweme/v1/play/?video_id=123",
|
|
"https://v11-weba.douyinvod.com/example/video.mp4",
|
|
"https://v26-web.douyinvod.com/example/video.mp4",
|
|
]
|
|
self.assertEqual(
|
|
module.choose_video_url(urls),
|
|
"https://v11-weba.douyinvod.com/example/video.mp4",
|
|
)
|
|
|
|
def test_build_output_path_uses_video_directory(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
output_path = module.build_output_path("测试标题", "123456")
|
|
self.assertEqual(output_path.as_posix(), "video/测试标题-123456.mp4")
|
|
|
|
def test_extract_aweme_payload_uses_dict_body(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
response = FakeResponse({"aweme_list": []}, "")
|
|
self.assertEqual(module.extract_aweme_payload(response), {"aweme_list": []})
|
|
|
|
def test_extract_aweme_payload_falls_back_to_raw_json(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
response = FakeResponse("", '{"aweme_list": [{"aweme_id": "1"}]}')
|
|
self.assertEqual(
|
|
module.extract_aweme_payload(response),
|
|
{"aweme_list": [{"aweme_id": "1"}]},
|
|
)
|
|
|
|
def test_build_browser_address_from_port(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
self.assertEqual(module.build_browser_address(9223), "127.0.0.1:9223")
|
|
self.assertIsNone(module.build_browser_address(None))
|
|
|
|
def test_ensure_browser_debug_port_ready_accepts_open_port(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
connection = mock.MagicMock()
|
|
connection.__enter__.return_value = connection
|
|
connection.__exit__.return_value = False
|
|
with mock.patch.object(module.socket, "create_connection", return_value=connection) as mocked_connect:
|
|
module.ensure_browser_debug_port_ready(9223)
|
|
mocked_connect.assert_called_once()
|
|
|
|
def test_ensure_browser_debug_port_ready_rejects_closed_port(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
with mock.patch.object(module.socket, "create_connection", side_effect=OSError("boom")):
|
|
with self.assertRaisesRegex(RuntimeError, "login_douyin.py"):
|
|
module.ensure_browser_debug_port_ready(9223)
|
|
|
|
def test_is_recommendation_url_accepts_douyin_homepage(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
self.assertTrue(module.is_recommendation_url("https://www.douyin.com/"))
|
|
self.assertTrue(module.is_recommendation_url("https://www.douyin.com"))
|
|
self.assertTrue(module.is_recommendation_url("https://www.douyin.com/?from=web"))
|
|
self.assertFalse(module.is_recommendation_url("https://www.douyin.com/user/xxx"))
|
|
self.assertFalse(module.is_recommendation_url("https://www.douyin.com/video/123"))
|
|
|
|
def test_is_creator_url_accepts_supported_douyin_creator_url(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
self.assertTrue(
|
|
module.is_creator_url(
|
|
"https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main"
|
|
)
|
|
)
|
|
self.assertFalse(module.is_creator_url("https://www.douyin.com/video/7619989983668240802"))
|
|
|
|
def test_is_video_url_accepts_supported_douyin_video_url(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
self.assertTrue(module.is_video_url("https://www.douyin.com/video/7619989983668240802"))
|
|
self.assertFalse(
|
|
module.is_video_url("https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main")
|
|
)
|
|
|
|
def test_is_aweme_id_accepts_numeric_identifier(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
self.assertTrue(module.is_aweme_id("7619989983668240802"))
|
|
self.assertFalse(module.is_aweme_id("not-an-aweme-id"))
|
|
|
|
def test_parse_target_input_classifies_recommendation_url(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
target = module.parse_target_input("https://www.douyin.com/", source="manual")
|
|
self.assertEqual(target.kind, "recommendation")
|
|
self.assertEqual(target.value, "https://www.douyin.com/")
|
|
self.assertEqual(target.source, "manual")
|
|
|
|
def test_parse_target_input_classifies_creator_url(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
target = module.parse_target_input(
|
|
"https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main",
|
|
source="manual",
|
|
)
|
|
self.assertEqual(target.kind, "creator")
|
|
self.assertEqual(
|
|
target.value,
|
|
"https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main",
|
|
)
|
|
self.assertEqual(target.source, "manual")
|
|
|
|
def test_parse_target_input_classifies_video_url(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
target = module.parse_target_input(
|
|
"https://www.douyin.com/video/7619989983668240802",
|
|
source="manual",
|
|
)
|
|
self.assertEqual(target.kind, "single-video")
|
|
self.assertEqual(target.aweme_id, "7619989983668240802")
|
|
self.assertEqual(target.source, "manual")
|
|
|
|
def test_parse_target_input_classifies_aweme_id(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
target = module.parse_target_input("7619989983668240802", source="manual")
|
|
self.assertEqual(target.kind, "single-video")
|
|
self.assertEqual(target.value, "7619989983668240802")
|
|
self.assertEqual(target.aweme_id, "7619989983668240802")
|
|
|
|
def test_resolve_target_uses_current_page_when_cli_target_is_absent(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
target = module.resolve_target(
|
|
page=FakePage("https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main"),
|
|
cli_target=None,
|
|
)
|
|
self.assertEqual(target.kind, "creator")
|
|
self.assertEqual(target.source, "current-page")
|
|
|
|
def test_resolve_target_raises_readable_error_when_current_page_is_unsupported(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
with self.assertRaisesRegex(RuntimeError, "手动传入链接或 `aweme_id`"):
|
|
module.resolve_target(page=FakePage("https://www.example.com/"), cli_target=None)
|
|
|
|
def test_resolve_target_raises_readable_error_when_manual_input_is_unsupported(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
with self.assertRaisesRegex(RuntimeError, "不支持的目标"):
|
|
module.resolve_target(page=FakePage("https://www.douyin.com/video/7619989983668240802"), cli_target="abc")
|
|
|
|
def test_collect_videos_does_not_auto_scroll_when_processing_current_page_only(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
packet = FakePacket(
|
|
{
|
|
"aweme_list": [
|
|
{
|
|
"aweme_id": "7619989983668240802",
|
|
"desc": "当前页视频",
|
|
"video": {
|
|
"play_addr": {
|
|
"url_list": ["https://v26-web.douyinvod.com/example/video.mp4"]
|
|
}
|
|
},
|
|
}
|
|
]
|
|
}
|
|
)
|
|
page = FakeRuntimePage(
|
|
"https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main",
|
|
packet,
|
|
)
|
|
with mock.patch.object(module, "import_runtime_dependencies", return_value=(object(), object(), object())):
|
|
with mock.patch.object(module, "create_page", return_value=page):
|
|
with mock.patch.object(module, "download_video"):
|
|
with mock.patch.object(module, "scroll_to_next_page") as mocked_scroll:
|
|
downloaded = module.collect_videos(
|
|
user_url="https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main",
|
|
max_pages=1,
|
|
timeout=10,
|
|
output_dir=module.Path("video"),
|
|
browser_port=None,
|
|
)
|
|
self.assertEqual(downloaded, 1)
|
|
mocked_scroll.assert_not_called()
|
|
|
|
def test_collect_videos_raises_readable_error_when_no_aweme_items_are_available(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
packet = FakePacket({"aweme_list": []})
|
|
page = FakeRuntimePage(
|
|
"https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main",
|
|
packet,
|
|
)
|
|
with mock.patch.object(module, "import_runtime_dependencies", return_value=(object(), object(), object())):
|
|
with mock.patch.object(module, "create_page", return_value=page):
|
|
with mock.patch.object(module, "download_video"):
|
|
with self.assertRaisesRegex(RuntimeError, "当前页面未加载出可用作品数据"):
|
|
module.collect_videos(
|
|
user_url="https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main",
|
|
max_pages=1,
|
|
timeout=10,
|
|
output_dir=module.Path("video"),
|
|
browser_port=None,
|
|
)
|
|
|
|
def test_parse_aweme_items_extracts_author_info(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
payload = {
|
|
"aweme_list": [
|
|
{
|
|
"aweme_id": "7619989983668240802",
|
|
"desc": "测试视频",
|
|
"author": {
|
|
"nickname": "测试博主",
|
|
"uid": "123456789"
|
|
},
|
|
"video": {
|
|
"play_addr": {
|
|
"url_list": ["https://v26-web.douyinvod.com/example/video.mp4"]
|
|
}
|
|
},
|
|
}
|
|
]
|
|
}
|
|
items = module.parse_aweme_items(payload)
|
|
self.assertEqual(len(items), 1)
|
|
self.assertEqual(items[0]["author_name"], "测试博主")
|
|
self.assertEqual(items[0]["author_id"], "123456789")
|
|
|
|
def test_build_video_page_url_uses_aweme_id(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
self.assertEqual(
|
|
module.build_video_page_url("7619989983668240802"),
|
|
"https://www.douyin.com/video/7619989983668240802",
|
|
)
|
|
|
|
def test_collect_single_video_downloads_exactly_one_file_for_video_url_target(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
packet = FakePacket(
|
|
{
|
|
"aweme_detail": {
|
|
"aweme_id": "7619989983668240802",
|
|
"desc": "单视频页面",
|
|
"video": {
|
|
"play_addr": {
|
|
"url_list": ["https://v26-web.douyinvod.com/example/single.mp4"]
|
|
}
|
|
},
|
|
}
|
|
}
|
|
)
|
|
page = FakeRuntimePage("https://www.douyin.com/video/7619989983668240802", packet)
|
|
target = module.ResolvedTarget(
|
|
kind="single-video",
|
|
value="https://www.douyin.com/video/7619989983668240802",
|
|
source="manual",
|
|
aweme_id="7619989983668240802",
|
|
)
|
|
with mock.patch.object(module, "import_runtime_dependencies", return_value=(object(), object(), object())):
|
|
with mock.patch.object(module, "create_page", return_value=page):
|
|
with mock.patch.object(module, "download_video") as mocked_download:
|
|
downloaded = module.collect_single_video(
|
|
target=target,
|
|
timeout=10,
|
|
output_dir=module.Path("video"),
|
|
browser_port=None,
|
|
)
|
|
self.assertEqual(downloaded, 1)
|
|
self.assertEqual(page.visited_urls, ["https://www.douyin.com/video/7619989983668240802"])
|
|
mocked_download.assert_called_once()
|
|
|
|
def test_collect_single_video_downloads_exactly_one_file_for_aweme_id_target(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
packet = FakePacket(
|
|
{
|
|
"aweme_detail": {
|
|
"aweme_id": "7619989983668240802",
|
|
"desc": "单视频页面",
|
|
"video": {
|
|
"play_addr": {
|
|
"url_list": ["https://v26-web.douyinvod.com/example/single.mp4"]
|
|
}
|
|
},
|
|
}
|
|
}
|
|
)
|
|
page = FakeRuntimePage("about:blank", packet)
|
|
target = module.ResolvedTarget(
|
|
kind="single-video",
|
|
value="7619989983668240802",
|
|
source="manual",
|
|
aweme_id="7619989983668240802",
|
|
)
|
|
with mock.patch.object(module, "import_runtime_dependencies", return_value=(object(), object(), object())):
|
|
with mock.patch.object(module, "create_page", return_value=page):
|
|
with mock.patch.object(module, "download_video") as mocked_download:
|
|
downloaded = module.collect_single_video(
|
|
target=target,
|
|
timeout=10,
|
|
output_dir=module.Path("video"),
|
|
browser_port=None,
|
|
)
|
|
self.assertEqual(downloaded, 1)
|
|
self.assertEqual(page.visited_urls, ["https://www.douyin.com/video/7619989983668240802"])
|
|
mocked_download.assert_called_once()
|
|
|
|
def test_build_parser_defaults_to_zero_argument_current_page_flow(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
args = module.build_parser().parse_args([])
|
|
self.assertIsNone(args.target)
|
|
self.assertEqual(args.browser_port, 9223)
|
|
self.assertEqual(args.pages, 1)
|
|
|
|
def test_resolve_cli_target_prefers_manual_target_without_attaching_browser(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
with mock.patch.object(module, "import_runtime_dependencies") as mocked_imports:
|
|
target = module.resolve_cli_target("7619989983668240802", browser_port=9223)
|
|
self.assertEqual(target.kind, "single-video")
|
|
self.assertEqual(target.aweme_id, "7619989983668240802")
|
|
mocked_imports.assert_not_called()
|
|
|
|
def test_main_without_target_dispatches_current_page_creator_flow(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
stdout = io.StringIO()
|
|
creator_target = module.ResolvedTarget(
|
|
kind="creator",
|
|
value="https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main",
|
|
source="current-page",
|
|
)
|
|
with redirect_stdout(stdout):
|
|
with mock.patch.object(module, "resolve_cli_target", return_value=creator_target):
|
|
with mock.patch.object(module, "collect_videos", return_value=2) as mocked_collect:
|
|
exit_code = module.main([])
|
|
self.assertEqual(exit_code, 0)
|
|
mocked_collect.assert_called_once_with(
|
|
user_url="https://www.douyin.com/user/MS4wLjABAAAAexample?from_tab_name=main",
|
|
max_pages=1,
|
|
timeout=10,
|
|
output_dir=module.Path("video"),
|
|
browser_port=9223,
|
|
auto_scroll=False,
|
|
)
|
|
self.assertIn("处理结束,共下载 2 个视频", stdout.getvalue())
|
|
|
|
def test_main_returns_fallback_hint_when_current_page_is_unsupported(self) -> None:
|
|
module = importlib.import_module("Douyin")
|
|
stdout = io.StringIO()
|
|
with redirect_stdout(stdout):
|
|
with mock.patch.object(
|
|
module,
|
|
"resolve_cli_target",
|
|
side_effect=RuntimeError("请切到目标页面后重试,或手动传入链接或 `aweme_id`。"),
|
|
):
|
|
exit_code = module.main([])
|
|
self.assertEqual(exit_code, 1)
|
|
self.assertIn("手动传入链接或 `aweme_id`", stdout.getvalue())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|