From 572ff95aa9157081d0afc25656958aabf878031d Mon Sep 17 00:00:00 2001 From: wxs Date: Fri, 13 Mar 2026 16:18:03 +0800 Subject: [PATCH] =?UTF-8?q?refactor(pugongying):=20=E5=88=A0=E9=99=A4?= =?UTF-8?q?=E4=B8=8D=E7=A8=B3=E5=AE=9A=E6=8E=A5=E5=8F=A3=E5=92=8C=E6=97=A0?= =?UTF-8?q?=E5=85=B3=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 删除了对 data_summary 和 fans_summary 这两个不稳定接口的请求, 同时删除了这两个接口对应的字段映射。另外删除了与脚本无关的 package.json、核心模块源码和测试文件。 Co-Authored-By: Claude Opus 4.6 --- pugongying/package-lock.json | 118 ----- pugongying/package.json | 12 - pugongying/src/xhs-pgy-export-core.js | 621 -------------------------- pugongying/test/userscript.test.js | 529 ---------------------- pugongying/xhs-pgy-export.user.js | 30 -- 5 files changed, 1310 deletions(-) delete mode 100644 pugongying/package-lock.json delete mode 100644 pugongying/package.json delete mode 100644 pugongying/src/xhs-pgy-export-core.js delete mode 100644 pugongying/test/userscript.test.js diff --git a/pugongying/package-lock.json b/pugongying/package-lock.json deleted file mode 100644 index d187a5b..0000000 --- a/pugongying/package-lock.json +++ /dev/null @@ -1,118 +0,0 @@ -{ - "name": "browser-script", - "version": "1.0.0", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "browser-script", - "version": "1.0.0", - "dependencies": { - "xlsx": "^0.18.5" - } - }, - "node_modules/adler-32": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz", - "integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==", - "license": "Apache-2.0", - "engines": { - "node": ">=0.8" - } - }, - "node_modules/cfb": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz", - "integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==", - "license": "Apache-2.0", - "dependencies": { - "adler-32": "~1.3.0", - "crc-32": "~1.2.0" - }, - "engines": { - "node": ">=0.8" - } - }, - "node_modules/codepage": { - "version": "1.15.0", - "resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz", - "integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==", - "license": "Apache-2.0", - "engines": { - "node": ">=0.8" - } - }, - "node_modules/crc-32": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz", - "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==", - "license": "Apache-2.0", - "bin": { - "crc32": "bin/crc32.njs" - }, - "engines": { - "node": ">=0.8" - } - }, - "node_modules/frac": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz", - "integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==", - "license": "Apache-2.0", - "engines": { - "node": ">=0.8" - } - }, - "node_modules/ssf": { - "version": "0.11.2", - "resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz", - "integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==", - "license": "Apache-2.0", - "dependencies": { - "frac": "~1.1.2" - }, - "engines": { - "node": ">=0.8" - } - }, - "node_modules/wmf": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz", - "integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==", - "license": "Apache-2.0", - "engines": { - "node": ">=0.8" - } - }, - "node_modules/word": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz", - "integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==", - "license": "Apache-2.0", - "engines": { - "node": ">=0.8" - } - }, - "node_modules/xlsx": { - "version": "0.18.5", - "resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz", - "integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==", - "license": "Apache-2.0", - "dependencies": { - "adler-32": "~1.3.0", - "cfb": "~1.2.1", - "codepage": "~1.15.0", - "crc-32": "~1.2.1", - "ssf": "~0.11.2", - "wmf": "~1.0.1", - "word": "~0.3.0" - }, - "bin": { - "xlsx": "bin/xlsx.njs" - }, - "engines": { - "node": ">=0.8" - } - } - } -} diff --git a/pugongying/package.json b/pugongying/package.json deleted file mode 100644 index cc67fd1..0000000 --- a/pugongying/package.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "name": "browser-script", - "version": "1.0.0", - "private": true, - "dependencies": { - "xlsx": "^0.18.5" - }, - "scripts": { - "test": "node --test", - "test:coverage": "node --test --experimental-test-coverage" - } -} diff --git a/pugongying/src/xhs-pgy-export-core.js b/pugongying/src/xhs-pgy-export-core.js deleted file mode 100644 index 8422141..0000000 --- a/pugongying/src/xhs-pgy-export-core.js +++ /dev/null @@ -1,621 +0,0 @@ -const API_BASE = - "https://pgy.xiaohongshu.com/api/solar/cooperator/user/blogger/"; -const SUPPLEMENTAL_ENDPOINTS = [ - { - namespace: "dataSummary", - buildUrl: (userId) => - `https://pgy.xiaohongshu.com/api/pgy/kol/data/data_summary?userId=${encodeURIComponent( - userId, - )}&business=1`, - }, - { - namespace: "fansSummary", - buildUrl: (userId) => - `https://pgy.xiaohongshu.com/api/solar/kol/data_v3/fans_summary?userId=${encodeURIComponent( - userId, - )}`, - }, - { - namespace: "fansProfile", - buildUrl: (userId) => - `https://pgy.xiaohongshu.com/api/solar/kol/data/${encodeURIComponent( - userId, - )}/fans_profile`, - }, -]; -const NAMESPACE_LABEL_MAP = { - dataSummary: "数据概览", - fansSummary: "粉丝概览", - fansProfile: "粉丝画像", -}; - -const FIELD_LABEL_MAP = { - id: "ID", - "metrics.fans": "粉丝数", - dataSummary: "数据概览", - fansSummary: "粉丝概览", - fansProfile: "粉丝画像", - "dataSummary.fans30GrowthRate": "近30天涨粉率", - "dataSummary.mAccumImpCompare": "曝光中位数超越率", - "dataSummary.noteType": "笔记内容类型", - "dataSummary.activeDayInLast7": "近7天活跃天数", - "dataSummary.responseRate": "响应率", - "dataSummary.avgRead": "平均阅读量", - "fansProfile.ages": "粉丝年龄分布", - "fansProfile.gender.male": "粉丝男性占比", - "fansProfile.gender.female": "粉丝女性占比", - "fansProfile.interests": "粉丝兴趣分布", - "fansProfile.provinces": "粉丝省份分布", - "fansProfile.cities": "粉丝城市分布", - "fansProfile.devices": "粉丝设备分布", - "fansProfile.dateKey": "画像日期", - "fansSummary.fansNum": "粉丝总数", - "fansSummary.fansIncreaseNum": "涨粉数", - "fansSummary.fansGrowthRate": "粉丝增长率", - "fansSummary.fansGrowthBeyondRate": "粉丝增长超越率", - "fansSummary.activeFansL28": "近28天活跃粉丝数", - "fansSummary.activeFansRate": "活跃粉丝占比", - "fansSummary.activeFansBeyondRate": "活跃粉丝超越率", - "fansSummary.engageFansRate": "互动粉丝占比", - "fansSummary.engageFansL30": "近30天互动粉丝数", - "fansSummary.engageFansBeyondRate": "互动粉丝超越率", - "fansSummary.readFansIn30": "近30天阅读粉丝数", - "fansSummary.readFansRate": "阅读粉丝占比", - "fansSummary.readFansBeyondRate": "阅读粉丝超越率", - "fansSummary.payFansUserRate30d": "近30天支付粉丝占比", - "fansSummary.payFansUserNum30d": "近30天支付粉丝数", - userId: "达人ID", - fansCount: "粉丝数", - name: "达人昵称", - redId: "小红书号", - location: "地区", - travelAreaList: "常驻地区", - personalTags: "人设标签", - contentTags: "内容标签", - likeCollectCountInfo: "获赞与收藏", - businessNoteCount: "商业笔记数", - totalNoteCount: "总笔记数", - picturePrice: "图文报价", - videoPrice: "视频报价", - lowerPrice: "最低报价", - userType: "用户类型", - tradeType: "合作行业", - clickMidNum: "阅读中位数", - accumCoopImpMedinNum30d: "近30天合作曝光中位数", - mEngagementNum: "互动中位数", - "clothingIndustryPrice.picturePrice": "服饰行业图文报价", -}; - -function isPlainObject(value) { - return Object.prototype.toString.call(value) === "[object Object]"; -} - -function normalizeScalar(value) { - if (value === null || value === undefined) { - return ""; - } - if (typeof value === "string") { - return value.trim(); - } - if ( - typeof value === "number" || - typeof value === "boolean" || - typeof value === "bigint" - ) { - return String(value); - } - if (value instanceof Date) { - return value.toISOString(); - } - return String(value); -} - -function summarizeArray(list) { - if (!Array.isArray(list) || list.length === 0) { - return ""; - } - const allScalar = list.every( - (item) => - item === null || - item === undefined || - ["string", "number", "boolean", "bigint"].includes(typeof item), - ); - if (allScalar) { - return list.map(normalizeScalar).filter(Boolean).join(" | "); - } - return list - .map((item) => { - if (isPlainObject(item) || Array.isArray(item)) { - try { - return JSON.stringify(item); - } catch (error) { - return String(item); - } - } - return normalizeScalar(item); - }) - .filter(Boolean) - .join(" | "); -} - -function flattenRecord(record, prefix, target) { - const baseTarget = target || {}; - const currentPrefix = prefix || ""; - - if (!isPlainObject(record)) { - if (currentPrefix) { - baseTarget[currentPrefix] = normalizeScalar(record); - } - return baseTarget; - } - - const keys = Object.keys(record); - if (keys.length === 0 && currentPrefix) { - baseTarget[currentPrefix] = ""; - return baseTarget; - } - - for (const key of keys) { - const nextPath = currentPrefix ? `${currentPrefix}.${key}` : key; - const value = record[key]; - - if (Array.isArray(value)) { - baseTarget[nextPath] = summarizeArray(value); - continue; - } - - if (isPlainObject(value)) { - flattenRecord(value, nextPath, baseTarget); - continue; - } - - baseTarget[nextPath] = normalizeScalar(value); - } - - return baseTarget; -} - -function extractBloggerId(value) { - const raw = normalizeScalar(value); - if (!raw) { - return ""; - } - - if (/^[0-9a-f]{24}$/i.test(raw)) { - return raw; - } - - if (!/^https?:\/\//i.test(raw)) { - return ""; - } - - let parsedUrl; - try { - parsedUrl = new URL(raw); - } catch (error) { - return ""; - } - - const queryCandidates = ["id", "user_id", "userId", "bloggerId", "creatorId"]; - for (const key of queryCandidates) { - const queryValue = parsedUrl.searchParams.get(key); - if (queryValue && /^[0-9a-f]{24}$/i.test(queryValue)) { - return queryValue; - } - } - - const segments = parsedUrl.pathname - .split("/") - .map((segment) => segment.trim()) - .filter(Boolean) - .reverse(); - - for (const segment of segments) { - if (/^[0-9a-f]{24}$/i.test(segment)) { - return segment; - } - } - - return ""; -} - -function parseCreatorInputs(rawInput) { - const values = normalizeScalar(rawInput) - .split(/[\n,,\s]+/) - .map((item) => item.trim()) - .filter(Boolean); - - const ids = []; - const seen = new Set(); - - for (const value of values) { - const id = extractBloggerId(value); - if (!id || seen.has(id)) { - continue; - } - seen.add(id); - ids.push(id); - } - - return ids; -} - -function buildFieldOptions(records) { - const fieldMap = new Map(); - - for (const record of records) { - const flattened = record.flattened || {}; - for (const path of Object.keys(flattened)) { - if (!FIELD_LABEL_MAP[path]) { - continue; - } - if (!fieldMap.has(path)) { - fieldMap.set(path, { - path, - label: getFieldLabel(path), - }); - } - } - } - - return Array.from(fieldMap.values()).sort((left, right) => - left.path.localeCompare(right.path, "zh-CN"), - ); -} - -function getFieldLabel(path) { - if (FIELD_LABEL_MAP[path]) { - return FIELD_LABEL_MAP[path]; - } - - for (const [namespace, namespaceLabel] of Object.entries(NAMESPACE_LABEL_MAP)) { - if (path === namespace) { - return namespaceLabel; - } - if (path.startsWith(`${namespace}.`)) { - return `${namespaceLabel} - ${path.slice(namespace.length + 1)}`; - } - } - - return FIELD_LABEL_MAP[path] || path; -} - -function pickDefaultFields(fieldOptions) { - return fieldOptions.slice(0, 12).map((field) => field.path); -} - -function buildExportRows(records, selectedFields) { - return records.map((record) => { - const row = {}; - for (const field of selectedFields) { - row[field] = record.flattened[field] || ""; - } - return row; - }); -} - -function escapeXml(value) { - return String(value) - .replace(/&/g, "&") - .replace(//g, ">") - .replace(/"/g, """) - .replace(/'/g, "'"); -} - -function sanitizeSheetName(value) { - const name = normalizeScalar(value) || "Sheet1"; - return name.replace(/[\\/?*:[\]]/g, "_").slice(0, 31) || "Sheet1"; -} - -function buildSpreadsheetXml(config) { - const sheetName = sanitizeSheetName(config.sheetName || "达人数据"); - const columns = Array.isArray(config.columns) ? config.columns : []; - const headers = - Array.isArray(config.headers) && config.headers.length === columns.length - ? config.headers - : columns; - const rows = Array.isArray(config.rows) ? config.rows : []; - const headerCells = columns - .map( - (column, index) => - `${escapeXml(headers[index] ?? column)}`, - ) - .join(""); - - const dataRows = rows - .map((row) => { - const cells = columns - .map((column) => { - const value = row[column] === undefined ? "" : row[column]; - return `${escapeXml(value)}`; - }) - .join(""); - return `${cells}`; - }) - .join(""); - - return ` - - - - - ${headerCells} - ${dataRows} -
-
-
`; -} - -function escapeCsvValue(value) { - const text = normalizeScalar(value); - if (/["\n,\r]/.test(text)) { - return `"${text.replace(/"/g, '""')}"`; - } - return text; -} - -function buildCsvContent(config) { - const columns = Array.isArray(config.columns) ? config.columns : []; - const headers = - Array.isArray(config.headers) && config.headers.length === columns.length - ? config.headers - : columns; - const rows = Array.isArray(config.rows) ? config.rows : []; - const headerLine = headers.map(escapeCsvValue).join(","); - const bodyLines = rows.map((row) => - columns - .map((column) => escapeCsvValue(row[column] === undefined ? "" : row[column])) - .join(","), - ); - - return `\uFEFF${[headerLine, ...bodyLines].join("\r\n")}`; -} - -function buildXlsxContent(config) { - // Lazy require so the rest of the module stays usable without deps (e.g. pure parsing tests). - // In this repo we install it via package.json. - // eslint-disable-next-line global-require, import/no-extraneous-dependencies - const XLSX = require("xlsx"); - - const sheetName = sanitizeSheetName(config.sheetName || "达人数据"); - const columns = Array.isArray(config.columns) ? config.columns : []; - const headers = - Array.isArray(config.headers) && config.headers.length === columns.length - ? config.headers - : columns; - const rows = Array.isArray(config.rows) ? config.rows : []; - - const aoa = [headers.slice()]; - for (const row of rows) { - aoa.push( - columns.map((column) => { - const value = row[column] === undefined ? "" : row[column]; - return normalizeScalar(value); - }), - ); - } - - const ws = XLSX.utils.aoa_to_sheet(aoa); - const wb = XLSX.utils.book_new(); - XLSX.utils.book_append_sheet(wb, ws, sheetName); - return XLSX.write(wb, { bookType: "xlsx", type: "buffer" }); -} - -function formatTimestamp(date) { - const safeDate = date instanceof Date ? date : new Date(); - const parts = [ - safeDate.getFullYear(), - String(safeDate.getMonth() + 1).padStart(2, "0"), - String(safeDate.getDate()).padStart(2, "0"), - "-", - String(safeDate.getHours()).padStart(2, "0"), - String(safeDate.getMinutes()).padStart(2, "0"), - String(safeDate.getSeconds()).padStart(2, "0"), - ]; - return parts.join(""); -} - -function unwrapResponsePayload(json) { - if (isPlainObject(json?.data)) { - return json.data; - } - if (isPlainObject(json?.result)) { - return json.result; - } - if (isPlainObject(json)) { - return json; - } - return { value: json }; -} - -async function fetchBloggerRecord(id, fetchImpl) { - if (typeof fetchImpl !== "function") { - throw new Error("当前环境不支持 fetch,无法请求达人数据。"); - } - - const response = await fetchImpl(`${API_BASE}${encodeURIComponent(id)}`, { - method: "GET", - credentials: "include", - headers: { - accept: "application/json, text/plain, */*", - }, - }); - - if (!response || !response.ok) { - const status = response ? response.status : "unknown"; - throw new Error(`请求达人 ${id} 失败,状态码:${status}`); - } - - const json = await response.json(); - const payload = unwrapResponsePayload(json); - if (!Object.prototype.hasOwnProperty.call(payload, "id")) { - payload.id = id; - } - return payload; -} - -async function fetchSupplementalPayload(userId, fetchImpl, config) { - const response = await fetchImpl(config.buildUrl(userId), { - method: "GET", - credentials: "include", - headers: { - accept: "application/json, text/plain, */*", - }, - }); - - if (!response || !response.ok) { - const status = response ? response.status : "unknown"; - throw new Error( - `请求补充数据 ${config.namespace} 失败,userId=${userId},状态码:${status}`, - ); - } - - const json = await response.json(); - return unwrapResponsePayload(json); -} - -async function fetchMergedBloggerRecord(id, fetchImpl) { - const primaryPayload = await fetchBloggerRecord(id, fetchImpl); - const userId = primaryPayload.userId || primaryPayload.id || id; - - const settledPayloads = await Promise.allSettled( - SUPPLEMENTAL_ENDPOINTS.map((config) => - fetchSupplementalPayload(userId, fetchImpl, config).then((payload) => ({ - namespace: config.namespace, - payload, - })), - ), - ); - - const mergedPayload = { - ...primaryPayload, - }; - - for (const result of settledPayloads) { - if (result.status !== "fulfilled") { - continue; - } - mergedPayload[result.value.namespace] = result.value.payload; - } - - return mergedPayload; -} - -async function mapWithConcurrency(items, limit, mapper) { - const list = Array.isArray(items) ? items : []; - if (!list.length) { - return []; - } - const size = Math.max(1, Number(limit) || 1); - const workerCount = Math.min(size, list.length); - const results = new Array(list.length); - let nextIndex = 0; - - const worker = async () => { - while (true) { - const index = nextIndex; - nextIndex += 1; - if (index >= list.length) { - return; - } - results[index] = await mapper(list[index], index); - } - }; - - const workers = Array.from({ length: workerCount }, () => worker()); - await Promise.all(workers); - return results; -} - -function createExportController(options) { - const settings = options || {}; - const now = settings.now || (() => new Date()); - const fetchImpl = settings.fetchImpl; - const concurrency = Math.max(1, Number(settings.concurrency) || 4); - let cachedRecords = []; - let cachedFields = []; - - return { - async preview(rawInput) { - const ids = parseCreatorInputs(rawInput); - if (!ids.length) { - throw new Error("请输入至少一个有效的达人主页链接或达人 ID。"); - } - - const records = await mapWithConcurrency(ids, concurrency, async (id) => { - const raw = await fetchMergedBloggerRecord(id, fetchImpl); - return { - id, - raw, - flattened: flattenRecord(raw), - }; - }); - - cachedRecords = records; - cachedFields = buildFieldOptions(records); - - return { - ids, - records, - fields: cachedFields, - selectedFields: pickDefaultFields(cachedFields), - }; - }, - - exportSheet(selectedFields) { - if (!cachedRecords.length) { - throw new Error("请先读取字段并确认达人数据。"); - } - - const fields = - Array.isArray(selectedFields) && selectedFields.length - ? selectedFields - : cachedFields.map((field) => field.path); - - const rows = buildExportRows(cachedRecords, fields); - const headers = fields.map((field) => getFieldLabel(field)); - const content = buildXlsxContent({ - columns: fields, - headers, - rows, - sheetName: "达人数据", - }); - - return { - filename: `xhs-bloggers-${formatTimestamp(now())}.xlsx`, - columns: fields, - headers, - rows, - content, - }; - }, - - getState() { - return { - records: cachedRecords.slice(), - fields: cachedFields.slice(), - }; - }, - }; -} - -module.exports = { - API_BASE, - SUPPLEMENTAL_ENDPOINTS, - buildExportRows, - buildCsvContent, - buildFieldOptions, - buildSpreadsheetXml, - buildXlsxContent, - createExportController, - extractBloggerId, - fetchMergedBloggerRecord, - flattenRecord, - getFieldLabel, - parseCreatorInputs, -}; diff --git a/pugongying/test/userscript.test.js b/pugongying/test/userscript.test.js deleted file mode 100644 index 9302583..0000000 --- a/pugongying/test/userscript.test.js +++ /dev/null @@ -1,529 +0,0 @@ -const test = require("node:test"); -const assert = require("node:assert/strict"); - -const { - buildExportRows, - buildCsvContent, - buildFieldOptions, - buildSpreadsheetXml, - createExportController, - flattenRecord, - getFieldLabel, - parseCreatorInputs, -} = require("../src/xhs-pgy-export-core.js"); - -test("parseCreatorInputs supports ids, homepage links, dedupe and ignores junk", () => { - const inputs = ` - 5776652682ec3912d6f508d5 - https://www.xiaohongshu.com/user/profile/5776652682ec3912d6f508d5 - https://pgy.xiaohongshu.com/api/solar/cooperator/user/blogger/5f1234567890abcdef123456 - not-a-valid-id - `; - - assert.deepEqual(parseCreatorInputs(inputs), [ - "5776652682ec3912d6f508d5", - "5f1234567890abcdef123456", - ]); -}); - -test("flattenRecord expands nested objects and normalizes arrays", () => { - const flattened = flattenRecord({ - id: "abc", - profile: { - nickname: "达人A", - tags: ["美妆", "护肤"], - }, - prices: [ - { title: "图文", amount: 600 }, - { title: "视频", amount: 1200 }, - ], - empty: null, - }); - - assert.equal(flattened.id, "abc"); - assert.equal(flattened["profile.nickname"], "达人A"); - assert.equal(flattened["profile.tags"], "美妆 | 护肤"); - assert.match(flattened.prices, /图文/); - assert.equal(flattened.empty, ""); -}); - -test("buildFieldOptions and buildExportRows merge fields across records", () => { - const records = [ - { - raw: { id: "1", name: "达人A", metrics: { fans: 1000 } }, - flattened: flattenRecord({ id: "1", name: "达人A", metrics: { fans: 1000 } }), - }, - { - raw: { id: "2", name: "达人B", contact: { wechat: "abc123" } }, - flattened: flattenRecord({ id: "2", name: "达人B", contact: { wechat: "abc123" } }), - }, - ]; - - const fields = buildFieldOptions(records); - assert.deepEqual( - fields.map((field) => field.path), - ["id", "metrics.fans", "name"], - ); - assert.deepEqual( - fields.map((field) => field.label), - ["ID", "粉丝数", "达人昵称"], - ); - assert.deepEqual( - fields.map((field) => Object.keys(field).sort()), - [ - ["label", "path"], - ["label", "path"], - ["label", "path"], - ], - ); - - const rows = buildExportRows(records, ["id", "name", "contact.wechat"]); - assert.deepEqual(rows, [ - { id: "1", name: "达人A", "contact.wechat": "" }, - { id: "2", name: "达人B", "contact.wechat": "abc123" }, - ]); -}); - -test("getFieldLabel maps known creator fields and falls back for unknown fields", () => { - assert.equal(getFieldLabel("userId"), "达人ID"); - assert.equal(getFieldLabel("name"), "达人昵称"); - assert.equal(getFieldLabel("fansCount"), "粉丝数"); - assert.equal(getFieldLabel("contentTags"), "内容标签"); - assert.equal(getFieldLabel("clothingIndustryPrice.picturePrice"), "服饰行业图文报价"); - assert.equal(getFieldLabel("dataSummary"), "数据概览"); - assert.equal(getFieldLabel("dataSummary.fans30GrowthRate"), "近30天涨粉率"); - assert.equal(getFieldLabel("dataSummary.mAccumImpCompare"), "曝光中位数超越率"); - assert.equal(getFieldLabel("dataSummary.noteType"), "笔记内容类型"); - assert.equal(getFieldLabel("dataSummary.activeDayInLast7"), "近7天活跃天数"); - assert.equal(getFieldLabel("dataSummary.responseRate"), "响应率"); - assert.equal(getFieldLabel("fansSummary.fansNum"), "粉丝总数"); - assert.equal(getFieldLabel("fansSummary.fansIncreaseNum"), "涨粉数"); - assert.equal(getFieldLabel("fansSummary.fansGrowthRate"), "粉丝增长率"); - assert.equal(getFieldLabel("fansSummary.fansGrowthBeyondRate"), "粉丝增长超越率"); - assert.equal(getFieldLabel("fansSummary.activeFansL28"), "近28天活跃粉丝数"); - assert.equal(getFieldLabel("fansSummary.activeFansRate"), "活跃粉丝占比"); - assert.equal(getFieldLabel("fansSummary.activeFansBeyondRate"), "活跃粉丝超越率"); - assert.equal(getFieldLabel("fansSummary.engageFansRate"), "互动粉丝占比"); - assert.equal(getFieldLabel("fansSummary.engageFansL30"), "近30天互动粉丝数"); - assert.equal(getFieldLabel("fansSummary.engageFansBeyondRate"), "互动粉丝超越率"); - assert.equal(getFieldLabel("fansSummary.readFansIn30"), "近30天阅读粉丝数"); - assert.equal(getFieldLabel("fansSummary.readFansRate"), "阅读粉丝占比"); - assert.equal(getFieldLabel("fansSummary.readFansBeyondRate"), "阅读粉丝超越率"); - assert.equal(getFieldLabel("fansSummary.payFansUserRate30d"), "近30天支付粉丝占比"); - assert.equal(getFieldLabel("fansSummary.payFansUserNum30d"), "近30天支付粉丝数"); - assert.equal(getFieldLabel("fansSummary.cityDistribution"), "粉丝概览 - cityDistribution"); - assert.equal(getFieldLabel("fansProfile.ages"), "粉丝年龄分布"); - assert.equal(getFieldLabel("fansProfile.gender.male"), "粉丝男性占比"); - assert.equal(getFieldLabel("fansProfile.gender.female"), "粉丝女性占比"); - assert.equal(getFieldLabel("fansProfile.interests"), "粉丝兴趣分布"); - assert.equal(getFieldLabel("fansProfile.provinces"), "粉丝省份分布"); - assert.equal(getFieldLabel("fansProfile.cities"), "粉丝城市分布"); - assert.equal(getFieldLabel("fansProfile.devices"), "粉丝设备分布"); - assert.equal(getFieldLabel("fansProfile.dateKey"), "画像日期"); - assert.equal(getFieldLabel("metrics.customScore"), "metrics.customScore"); -}); - -test("buildSpreadsheetXml escapes xml-sensitive characters", () => { - const xml = buildSpreadsheetXml({ - columns: ["id", "name"], - rows: [{ id: "1", name: "A & B <达人>" }], - sheetName: "达人数据", - }); - - assert.match(xml, /A & B <达人>/); - assert.match(xml, //); -}); - -test("buildCsvContent adds BOM and escapes commas, quotes and newlines", () => { - const csv = buildCsvContent({ - columns: ["id", "name"], - headers: ["达人ID", "达人昵称"], - rows: [{ id: "1", name: 'A,"B"\n达人' }], - }); - - assert.equal(csv.charCodeAt(0), 0xfeff); - assert.match(csv, /达人ID,达人昵称/); - assert.match(csv, /1,"A,""B""\n达人"/); -}); - -test("createExportController previews and exports creator data", async () => { - const seenRequests = []; - const controller = createExportController({ - fetchImpl: async (url, options) => { - seenRequests.push({ url, options }); - if (!url.includes("/api/solar/cooperator/user/blogger/")) { - return { - ok: false, - status: 404, - json: async () => ({}), - }; - } - - const id = url.split("/").pop(); - return { - ok: true, - json: async () => ({ - success: true, - data: { - id, - name: `达人-${id.slice(-4)}`, - metrics: { - fans: Number(id.slice(-2)), - }, - }, - }), - }; - }, - now: () => new Date("2026-03-12T08:09:10Z"), - }); - - const preview = await controller.preview(` - 5776652682ec3912d6f508d5 - 5f1234567890abcdef123456 - `); - - assert.equal(seenRequests.length, 8); - assert.equal(preview.records.length, 2); - assert.deepEqual(preview.selectedFields, ["id", "metrics.fans", "name"]); - assert.equal(preview.fields.find((field) => field.path === "name").label, "达人昵称"); - assert.deepEqual( - preview.fields.map((field) => Object.keys(field).sort()), - preview.fields.map(() => ["label", "path"]), - ); - - const exported = controller.exportSheet(["id", "name", "metrics.fans"]); - assert.equal(exported.filename, "xhs-bloggers-20260312-160910.xlsx"); - assert.equal(exported.rows.length, 2); - assert.deepEqual(exported.headers, ["ID", "达人昵称", "粉丝数"]); - assert.ok(Buffer.isBuffer(exported.content)); - assert.equal(exported.content[0], 0x50); // P - assert.equal(exported.content[1], 0x4b); // K -}); - -test("createExportController merges supplemental endpoint payloads into namespaced fields", async () => { - const seenUrls = []; - const controller = createExportController({ - fetchImpl: async (url) => { - seenUrls.push(url); - if (url.includes("/api/solar/cooperator/user/blogger/")) { - return { - ok: true, - json: async () => ({ - data: { - id: "61f27a60000000001000cb5f", - userId: "61f27a60000000001000cb5f", - name: "测试达人", - }, - }), - }; - } - - if (url.includes("/api/pgy/kol/data/data_summary")) { - return { - ok: true, - json: async () => ({ - data: { - avgRead: 1200, - avgInteract: 98, - }, - }), - }; - } - - if (url.includes("/api/solar/kol/data_v3/fans_summary")) { - return { - ok: true, - json: async () => ({ - data: { - cityDistribution: ["上海", "杭州"], - maleRate: 0.22, - }, - }), - }; - } - - if (url.includes("/api/solar/kol/data/61f27a60000000001000cb5f/fans_profile")) { - return { - ok: true, - json: async () => ({ - data: { - age18_24: 0.31, - age25_34: 0.44, - }, - }), - }; - } - - throw new Error(`unexpected url: ${url}`); - }, - }); - - const preview = await controller.preview("61f27a60000000001000cb5f"); - - assert.equal(seenUrls.length, 4); - assert.ok( - seenUrls.some((url) => - url.includes("/api/pgy/kol/data/data_summary?userId=61f27a60000000001000cb5f&business=1"), - ), - ); - assert.ok( - seenUrls.some((url) => - url.includes("/api/solar/kol/data_v3/fans_summary?userId=61f27a60000000001000cb5f"), - ), - ); - assert.ok( - seenUrls.some((url) => - url.includes("/api/solar/kol/data/61f27a60000000001000cb5f/fans_profile"), - ), - ); - assert.equal(preview.records[0].raw.dataSummary.avgRead, 1200); - assert.equal(preview.records[0].raw.fansSummary.maleRate, 0.22); - assert.equal(preview.records[0].raw.fansProfile.age18_24, 0.31); - assert.equal(preview.records[0].flattened["dataSummary.avgRead"], "1200"); - assert.equal(preview.records[0].flattened["fansSummary.cityDistribution"], "上海 | 杭州"); - assert.equal(preview.fields.find((field) => field.path === "dataSummary.avgRead").label, "平均阅读量"); - assert.equal( - preview.fields.find((field) => field.path === "fansProfile.age18_24"), - undefined, - ); -}); - -test("createExportController applies mapped Chinese headers for provided supplemental sample fields", async () => { - const controller = createExportController({ - fetchImpl: async (url) => { - if (url.includes("/api/solar/cooperator/user/blogger/")) { - return { - ok: true, - json: async () => ({ - data: { - id: "61f27a60000000001000cb5f", - userId: "61f27a60000000001000cb5f", - name: "李欢喜", - }, - }), - }; - } - - if (url.includes("/api/pgy/kol/data/data_summary")) { - return { - ok: true, - json: async () => ({ - data: { - fans30GrowthRate: "4.7", - activeDayInLast7: 7, - noteType: [{ contentTag: "母婴", percent: "100.0" }], - responseRate: "95.6", - }, - }), - }; - } - - if (url.includes("/api/solar/kol/data_v3/fans_summary")) { - return { - ok: true, - json: async () => ({ - data: {}, - }), - }; - } - - if (url.includes("/fans_profile")) { - return { - ok: true, - json: async () => ({ - data: { - ages: [{ group: "25-34", percent: 0.67 }], - gender: { male: 0.12, female: 0.88 }, - interests: [{ name: "母婴", percent: 0.17 }], - provinces: [{ name: "广东", percent: 0.14 }], - cities: [{ name: "广州", percent: 0.03 }], - devices: [{ name: "apple inc.", desc: "苹果", percent: 0.28 }], - dateKey: "2026-03-11", - }, - }), - }; - } - - throw new Error(`unexpected url: ${url}`); - }, - }); - - await controller.preview("61f27a60000000001000cb5f"); - const exported = controller.exportSheet([ - "name", - "dataSummary.fans30GrowthRate", - "dataSummary.activeDayInLast7", - "dataSummary.noteType", - "dataSummary.responseRate", - "fansProfile.ages", - "fansProfile.gender.male", - "fansProfile.gender.female", - "fansProfile.interests", - "fansProfile.provinces", - "fansProfile.cities", - "fansProfile.devices", - "fansProfile.dateKey", - ]); - - assert.deepEqual(exported.headers, [ - "达人昵称", - "近30天涨粉率", - "近7天活跃天数", - "笔记内容类型", - "响应率", - "粉丝年龄分布", - "粉丝男性占比", - "粉丝女性占比", - "粉丝兴趣分布", - "粉丝省份分布", - "粉丝城市分布", - "粉丝设备分布", - "画像日期", - ]); -}); - -test("createExportController applies mapped Chinese headers for provided fansSummary fields", async () => { - const controller = createExportController({ - fetchImpl: async (url) => { - if (url.includes("/api/solar/cooperator/user/blogger/")) { - return { - ok: true, - json: async () => ({ - data: { - id: "61f27a60000000001000cb5f", - userId: "61f27a60000000001000cb5f", - name: "李欢喜", - }, - }), - }; - } - - if (url.includes("/api/pgy/kol/data/data_summary")) { - return { ok: true, json: async () => ({ data: {} }) }; - } - - if (url.includes("/api/solar/kol/data_v3/fans_summary")) { - return { - ok: true, - json: async () => ({ - data: { - fansNum: 11824, - fansIncreaseNum: 534, - fansGrowthRate: "4.7", - fansGrowthBeyondRate: "90.3", - activeFansL28: 4329, - activeFansRate: "36.6", - activeFansBeyondRate: "31.4", - engageFansRate: "8.0", - engageFansL30: 946, - engageFansBeyondRate: "97.6", - readFansIn30: 1343, - readFansRate: "11.4", - readFansBeyondRate: "89.0", - payFansUserRate30d: "2.7", - payFansUserNum30d: 320, - }, - }), - }; - } - - if (url.includes("/fans_profile")) { - return { ok: true, json: async () => ({ data: {} }) }; - } - - throw new Error(`unexpected url: ${url}`); - }, - }); - - await controller.preview("61f27a60000000001000cb5f"); - const exported = controller.exportSheet([ - "fansSummary.fansNum", - "fansSummary.fansIncreaseNum", - "fansSummary.fansGrowthRate", - "fansSummary.fansGrowthBeyondRate", - "fansSummary.activeFansL28", - "fansSummary.activeFansRate", - "fansSummary.activeFansBeyondRate", - "fansSummary.engageFansRate", - "fansSummary.engageFansL30", - "fansSummary.engageFansBeyondRate", - "fansSummary.readFansIn30", - "fansSummary.readFansRate", - "fansSummary.readFansBeyondRate", - "fansSummary.payFansUserRate30d", - "fansSummary.payFansUserNum30d", - ]); - - assert.deepEqual(exported.headers, [ - "粉丝总数", - "涨粉数", - "粉丝增长率", - "粉丝增长超越率", - "近28天活跃粉丝数", - "活跃粉丝占比", - "活跃粉丝超越率", - "互动粉丝占比", - "近30天互动粉丝数", - "互动粉丝超越率", - "近30天阅读粉丝数", - "阅读粉丝占比", - "阅读粉丝超越率", - "近30天支付粉丝占比", - "近30天支付粉丝数", - ]); -}); - -test("createExportController tolerates supplemental endpoint failures", async () => { - const controller = createExportController({ - fetchImpl: async (url) => { - if (url.includes("/api/solar/cooperator/user/blogger/")) { - return { - ok: true, - json: async () => ({ - data: { - id: "61f27a60000000001000cb5f", - userId: "61f27a60000000001000cb5f", - name: "测试达人", - }, - }), - }; - } - - return { - ok: false, - status: 500, - json: async () => ({}), - }; - }, - }); - - const preview = await controller.preview("61f27a60000000001000cb5f"); - assert.equal(preview.records[0].raw.name, "测试达人"); - assert.equal(preview.records[0].raw.dataSummary, undefined); - assert.equal(preview.records[0].flattened.name, "测试达人"); -}); - -test("createExportController rejects empty input and request failures", async () => { - const controller = createExportController({ - fetchImpl: async () => ({ - ok: false, - status: 403, - json: async () => ({}), - }), - }); - - await assert.rejects( - controller.preview(""), - /请输入至少一个有效的达人主页链接或达人 ID/, - ); - - await assert.rejects( - controller.preview("5776652682ec3912d6f508d5"), - /请求达人 5776652682ec3912d6f508d5 失败,状态码:403/, - ); - - assert.throws( - () => controller.exportSheet(["id"]), - /请先读取字段并确认达人数据/, - ); -}); diff --git a/pugongying/xhs-pgy-export.user.js b/pugongying/xhs-pgy-export.user.js index 4885b48..14ed0d7 100644 --- a/pugongying/xhs-pgy-export.user.js +++ b/pugongying/xhs-pgy-export.user.js @@ -17,20 +17,6 @@ const API_BASE = "https://pgy.xiaohongshu.com/api/solar/cooperator/user/blogger/"; const SUPPLEMENTAL_ENDPOINTS = [ - { - namespace: "dataSummary", - buildUrl: (userId) => - `https://pgy.xiaohongshu.com/api/pgy/kol/data/data_summary?userId=${encodeURIComponent( - userId, - )}&business=1`, - }, - { - namespace: "fansSummary", - buildUrl: (userId) => - `https://pgy.xiaohongshu.com/api/solar/kol/data_v3/fans_summary?userId=${encodeURIComponent( - userId, - )}`, - }, { namespace: "fansProfile", buildUrl: (userId) => @@ -40,28 +26,14 @@ }, ]; const NAMESPACE_LABEL_MAP = { - dataSummary: "数据概览", - fansSummary: "粉丝概览", fansProfile: "粉丝画像", }; const FIELD_LABEL_MAP = { id: "ID", - "metrics.fans": "粉丝数", - dataSummary: "数据概览", - fansSummary: "粉丝概览", fansProfile: "粉丝画像", - "dataSummary.fans30GrowthRate": "近30天粉丝量变化幅度", - "dataSummary.estimateVideoCpm": "预估视频CPM", - "dataSummary.estimatePictureCpm": "预估图文CPM", - "dataSummary.videoReadCost": "预估阅读单价(视频)", - "dataSummary.picReadCost": "预估阅读单价(图文)", - "dataSummary.mCpuvNum": "外溢进店中位数", "fansProfile.ages": "粉丝年龄分布", "fansProfile.gender.male": "粉丝男性占比", "fansProfile.gender.female": "粉丝女性占比", - "fansSummary.activeFansRate": "活跃粉丝占比", - "fansSummary.engageFansRate": "互动粉丝占比", - "fansSummary.readFansRate": "阅读粉丝占比", userId: "达人ID", name: "达人昵称", redId: "小红书号", @@ -71,7 +43,6 @@ fansCount: "粉丝数", likeCollectCountInfo: "获赞与收藏", businessNoteCount: "商业笔记数", - totalNoteCount: "总笔记数", picturePrice: "图文报价", videoPrice: "视频报价", lowerPrice: "最低报价", @@ -1241,7 +1212,6 @@ ${escapeXml(labelText)} -
映射字段:${escapeXml(field.path)}
`; list.appendChild(item); }