179 lines
7.6 KiB
Python
179 lines
7.6 KiB
Python
import importlib
|
|
import unittest
|
|
from unittest import mock
|
|
|
|
|
|
class FakeResponse:
|
|
def __init__(self, body, raw_body):
|
|
self.body = body
|
|
self.raw_body = raw_body
|
|
|
|
|
|
class XhsModuleTests(unittest.TestCase):
|
|
def test_module_can_import_without_optional_runtime_dependencies(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
self.assertIsNotNone(module)
|
|
|
|
def test_sanitize_filename_removes_invalid_characters(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
self.assertEqual(
|
|
module.sanitize_filename('a/b:c*?d"e<f>g|h\n'),
|
|
"a_b_c__d_e_f_g_h",
|
|
)
|
|
|
|
def test_truncate_utf8_bytes_keeps_valid_utf8(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
result = module.truncate_utf8_bytes("测试标题" * 20, 20)
|
|
self.assertLessEqual(len(result.encode("utf-8")), 20)
|
|
result.encode("utf-8")
|
|
|
|
def test_choose_video_url_prefers_master_url(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
candidates = [
|
|
module.VideoCandidate(
|
|
video_id="note-1",
|
|
title="标题",
|
|
video_url="https://example.com/backup.mp4",
|
|
author_name="作者",
|
|
source_key="backup_urls",
|
|
),
|
|
module.VideoCandidate(
|
|
video_id="note-1",
|
|
title="标题",
|
|
video_url="https://sns-video.xhscdn.com/master.mp4",
|
|
author_name="作者",
|
|
source_key="master_url",
|
|
),
|
|
]
|
|
self.assertEqual(module.choose_video_candidate(candidates).video_url, "https://sns-video.xhscdn.com/master.mp4")
|
|
|
|
def test_group_video_candidates_keeps_one_preferred_candidate_per_video_id(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
candidates = [
|
|
module.VideoCandidate("note-1", "标题", "https://example.com/backup.mp4", "作者", "backup_urls"),
|
|
module.VideoCandidate("note-1", "标题", "https://example.com/master.mp4", "作者", "master_url"),
|
|
module.VideoCandidate("note-2", "标题2", "https://example.com/two.mp4", "作者", "master_url"),
|
|
]
|
|
grouped = module.group_video_candidates(candidates)
|
|
self.assertEqual([item.video_id for item in grouped], ["note-1", "note-2"])
|
|
self.assertEqual(grouped[0].video_url, "https://example.com/master.mp4")
|
|
|
|
def test_extract_video_candidates_from_nested_feed_payload(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
payload = {
|
|
"data": {
|
|
"items": [
|
|
{
|
|
"id": "note123",
|
|
"note_card": {
|
|
"display_title": "海边日落",
|
|
"user": {"nickname": "摄影师"},
|
|
"video": {
|
|
"media": {
|
|
"stream": {
|
|
"h264": [
|
|
{
|
|
"master_url": "https://sns-video.xhscdn.com/stream/a.mp4?sign=1",
|
|
"backup_urls": [
|
|
"https://sns-video.xhscdn.com/stream/a-backup.mp4?sign=1"
|
|
],
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
},
|
|
}
|
|
]
|
|
}
|
|
}
|
|
candidates = module.extract_video_candidates(payload)
|
|
self.assertEqual(len(candidates), 2)
|
|
self.assertEqual(candidates[0].video_id, "note123")
|
|
self.assertEqual(candidates[0].title, "海边日落")
|
|
self.assertEqual(candidates[0].author_name, "摄影师")
|
|
self.assertEqual(candidates[0].source_key, "master_url")
|
|
|
|
def test_build_output_path_uses_author_title_and_video_id(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
candidate = module.VideoCandidate(
|
|
video_id="note123",
|
|
title="海边日落",
|
|
video_url="https://sns-video.xhscdn.com/a.mp4",
|
|
author_name="摄影师",
|
|
source_key="master_url",
|
|
)
|
|
output_path = module.build_output_path(candidate)
|
|
self.assertEqual(output_path.as_posix(), "video/[摄影师]海边日落-note123.mp4")
|
|
|
|
def test_build_browser_address_from_port(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
self.assertEqual(module.build_browser_address(9224), "127.0.0.1:9224")
|
|
self.assertIsNone(module.build_browser_address(None))
|
|
|
|
def test_ensure_browser_debug_port_ready_accepts_open_port(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
connection = mock.MagicMock()
|
|
connection.__enter__.return_value = connection
|
|
connection.__exit__.return_value = False
|
|
with mock.patch.object(module.socket, "create_connection", return_value=connection) as mocked_connect:
|
|
module.ensure_browser_debug_port_ready(9224)
|
|
mocked_connect.assert_called_once()
|
|
|
|
def test_ensure_browser_debug_port_ready_rejects_closed_port(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
with mock.patch.object(module.socket, "create_connection", side_effect=OSError("boom")):
|
|
with self.assertRaisesRegex(RuntimeError, "login_xhs.py"):
|
|
module.ensure_browser_debug_port_ready(9224)
|
|
|
|
def test_extract_feed_payload_uses_dict_body(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
response = FakeResponse({"data": {"items": []}}, "")
|
|
self.assertEqual(module.extract_feed_payload(response), {"data": {"items": []}})
|
|
|
|
def test_extract_feed_payload_falls_back_to_raw_json(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
response = FakeResponse("", '{"data": {"items": [{"id": "1"}]}}')
|
|
self.assertEqual(
|
|
module.extract_feed_payload(response),
|
|
{"data": {"items": [{"id": "1"}]}},
|
|
)
|
|
|
|
def test_build_parser_uses_expected_defaults(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
args = module.build_parser().parse_args([])
|
|
self.assertEqual(args.max_videos, 10)
|
|
self.assertEqual(args.output_dir, "video")
|
|
self.assertEqual(args.browser_port, 9224)
|
|
self.assertEqual(args.timeout, 20)
|
|
self.assertEqual(args.start_url, module.DEFAULT_EXPLORE_URL)
|
|
|
|
def test_main_invokes_collect_videos_with_cli_values(self) -> None:
|
|
module = importlib.import_module("XHS")
|
|
with mock.patch.object(module, "collect_videos", return_value=3) as mocked_collect:
|
|
exit_code = module.main(
|
|
[
|
|
"--max-videos",
|
|
"3",
|
|
"--output-dir",
|
|
"downloads",
|
|
"--browser-port",
|
|
"9334",
|
|
"--timeout",
|
|
"7",
|
|
"--start-url",
|
|
"https://www.xiaohongshu.com/explore",
|
|
]
|
|
)
|
|
self.assertEqual(exit_code, 0)
|
|
mocked_collect.assert_called_once()
|
|
_, kwargs = mocked_collect.call_args
|
|
self.assertEqual(kwargs["max_videos"], 3)
|
|
self.assertEqual(kwargs["output_dir"].as_posix(), "downloads")
|
|
self.assertEqual(kwargs["browser_port"], 9334)
|
|
self.assertEqual(kwargs["timeout"], 7)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|