596 lines
16 KiB
JavaScript
596 lines
16 KiB
JavaScript
const API_BASE =
|
||
"https://pgy.xiaohongshu.com/api/solar/cooperator/user/blogger/";
|
||
const SUPPLEMENTAL_ENDPOINTS = [
|
||
{
|
||
namespace: "dataSummary",
|
||
buildUrl: (userId) =>
|
||
`https://pgy.xiaohongshu.com/api/pgy/kol/data/data_summary?userId=${encodeURIComponent(
|
||
userId,
|
||
)}&business=1`,
|
||
},
|
||
{
|
||
namespace: "fansSummary",
|
||
buildUrl: (userId) =>
|
||
`https://pgy.xiaohongshu.com/api/solar/kol/data_v3/fans_summary?userId=${encodeURIComponent(
|
||
userId,
|
||
)}`,
|
||
},
|
||
{
|
||
namespace: "fansProfile",
|
||
buildUrl: (userId) =>
|
||
`https://pgy.xiaohongshu.com/api/solar/kol/data/${encodeURIComponent(
|
||
userId,
|
||
)}/fans_profile`,
|
||
},
|
||
];
|
||
const NAMESPACE_LABEL_MAP = {
|
||
dataSummary: "数据概览",
|
||
fansSummary: "粉丝概览",
|
||
fansProfile: "粉丝画像",
|
||
};
|
||
|
||
const FIELD_LABEL_MAP = {
|
||
id: "ID",
|
||
"metrics.fans": "粉丝数",
|
||
dataSummary: "数据概览",
|
||
fansSummary: "粉丝概览",
|
||
fansProfile: "粉丝画像",
|
||
"dataSummary.fans30GrowthRate": "近30天涨粉率",
|
||
"dataSummary.mAccumImpCompare": "曝光中位数超越率",
|
||
"dataSummary.noteType": "笔记内容类型",
|
||
"dataSummary.activeDayInLast7": "近7天活跃天数",
|
||
"dataSummary.responseRate": "响应率",
|
||
"dataSummary.avgRead": "平均阅读量",
|
||
"fansProfile.ages": "粉丝年龄分布",
|
||
"fansProfile.gender.male": "粉丝男性占比",
|
||
"fansProfile.gender.female": "粉丝女性占比",
|
||
"fansProfile.interests": "粉丝兴趣分布",
|
||
"fansProfile.provinces": "粉丝省份分布",
|
||
"fansProfile.cities": "粉丝城市分布",
|
||
"fansProfile.devices": "粉丝设备分布",
|
||
"fansProfile.dateKey": "画像日期",
|
||
"fansSummary.fansNum": "粉丝总数",
|
||
"fansSummary.fansIncreaseNum": "涨粉数",
|
||
"fansSummary.fansGrowthRate": "粉丝增长率",
|
||
"fansSummary.fansGrowthBeyondRate": "粉丝增长超越率",
|
||
"fansSummary.activeFansL28": "近28天活跃粉丝数",
|
||
"fansSummary.activeFansRate": "活跃粉丝占比",
|
||
"fansSummary.activeFansBeyondRate": "活跃粉丝超越率",
|
||
"fansSummary.engageFansRate": "互动粉丝占比",
|
||
"fansSummary.engageFansL30": "近30天互动粉丝数",
|
||
"fansSummary.engageFansBeyondRate": "互动粉丝超越率",
|
||
"fansSummary.readFansIn30": "近30天阅读粉丝数",
|
||
"fansSummary.readFansRate": "阅读粉丝占比",
|
||
"fansSummary.readFansBeyondRate": "阅读粉丝超越率",
|
||
"fansSummary.payFansUserRate30d": "近30天支付粉丝占比",
|
||
"fansSummary.payFansUserNum30d": "近30天支付粉丝数",
|
||
userId: "达人ID",
|
||
fansCount: "粉丝数",
|
||
name: "达人昵称",
|
||
redId: "小红书号",
|
||
location: "地区",
|
||
travelAreaList: "常驻地区",
|
||
personalTags: "人设标签",
|
||
contentTags: "内容标签",
|
||
likeCollectCountInfo: "获赞与收藏",
|
||
businessNoteCount: "商业笔记数",
|
||
totalNoteCount: "总笔记数",
|
||
picturePrice: "图文报价",
|
||
videoPrice: "视频报价",
|
||
lowerPrice: "最低报价",
|
||
userType: "用户类型",
|
||
tradeType: "合作行业",
|
||
clickMidNum: "阅读中位数",
|
||
accumCoopImpMedinNum30d: "近30天合作曝光中位数",
|
||
mEngagementNum: "互动中位数",
|
||
"clothingIndustryPrice.picturePrice": "服饰行业图文报价",
|
||
};
|
||
|
||
function isPlainObject(value) {
|
||
return Object.prototype.toString.call(value) === "[object Object]";
|
||
}
|
||
|
||
function normalizeScalar(value) {
|
||
if (value === null || value === undefined) {
|
||
return "";
|
||
}
|
||
if (typeof value === "string") {
|
||
return value.trim();
|
||
}
|
||
if (
|
||
typeof value === "number" ||
|
||
typeof value === "boolean" ||
|
||
typeof value === "bigint"
|
||
) {
|
||
return String(value);
|
||
}
|
||
if (value instanceof Date) {
|
||
return value.toISOString();
|
||
}
|
||
return String(value);
|
||
}
|
||
|
||
function summarizeArray(list) {
|
||
if (!Array.isArray(list) || list.length === 0) {
|
||
return "";
|
||
}
|
||
const allScalar = list.every(
|
||
(item) =>
|
||
item === null ||
|
||
item === undefined ||
|
||
["string", "number", "boolean", "bigint"].includes(typeof item),
|
||
);
|
||
if (allScalar) {
|
||
return list.map(normalizeScalar).filter(Boolean).join(" | ");
|
||
}
|
||
return list
|
||
.map((item) => {
|
||
if (isPlainObject(item) || Array.isArray(item)) {
|
||
try {
|
||
return JSON.stringify(item);
|
||
} catch (error) {
|
||
return String(item);
|
||
}
|
||
}
|
||
return normalizeScalar(item);
|
||
})
|
||
.filter(Boolean)
|
||
.join(" | ");
|
||
}
|
||
|
||
function flattenRecord(record, prefix, target) {
|
||
const baseTarget = target || {};
|
||
const currentPrefix = prefix || "";
|
||
|
||
if (!isPlainObject(record)) {
|
||
if (currentPrefix) {
|
||
baseTarget[currentPrefix] = normalizeScalar(record);
|
||
}
|
||
return baseTarget;
|
||
}
|
||
|
||
const keys = Object.keys(record);
|
||
if (keys.length === 0 && currentPrefix) {
|
||
baseTarget[currentPrefix] = "";
|
||
return baseTarget;
|
||
}
|
||
|
||
for (const key of keys) {
|
||
const nextPath = currentPrefix ? `${currentPrefix}.${key}` : key;
|
||
const value = record[key];
|
||
|
||
if (Array.isArray(value)) {
|
||
baseTarget[nextPath] = summarizeArray(value);
|
||
continue;
|
||
}
|
||
|
||
if (isPlainObject(value)) {
|
||
flattenRecord(value, nextPath, baseTarget);
|
||
continue;
|
||
}
|
||
|
||
baseTarget[nextPath] = normalizeScalar(value);
|
||
}
|
||
|
||
return baseTarget;
|
||
}
|
||
|
||
function extractBloggerId(value) {
|
||
const raw = normalizeScalar(value);
|
||
if (!raw) {
|
||
return "";
|
||
}
|
||
|
||
if (/^[0-9a-f]{24}$/i.test(raw)) {
|
||
return raw;
|
||
}
|
||
|
||
if (!/^https?:\/\//i.test(raw)) {
|
||
return "";
|
||
}
|
||
|
||
let parsedUrl;
|
||
try {
|
||
parsedUrl = new URL(raw);
|
||
} catch (error) {
|
||
return "";
|
||
}
|
||
|
||
const queryCandidates = ["id", "user_id", "userId", "bloggerId", "creatorId"];
|
||
for (const key of queryCandidates) {
|
||
const queryValue = parsedUrl.searchParams.get(key);
|
||
if (queryValue && /^[0-9a-f]{24}$/i.test(queryValue)) {
|
||
return queryValue;
|
||
}
|
||
}
|
||
|
||
const segments = parsedUrl.pathname
|
||
.split("/")
|
||
.map((segment) => segment.trim())
|
||
.filter(Boolean)
|
||
.reverse();
|
||
|
||
for (const segment of segments) {
|
||
if (/^[0-9a-f]{24}$/i.test(segment)) {
|
||
return segment;
|
||
}
|
||
}
|
||
|
||
return "";
|
||
}
|
||
|
||
function parseCreatorInputs(rawInput) {
|
||
const values = normalizeScalar(rawInput)
|
||
.split(/[\n,,\s]+/)
|
||
.map((item) => item.trim())
|
||
.filter(Boolean);
|
||
|
||
const ids = [];
|
||
const seen = new Set();
|
||
|
||
for (const value of values) {
|
||
const id = extractBloggerId(value);
|
||
if (!id || seen.has(id)) {
|
||
continue;
|
||
}
|
||
seen.add(id);
|
||
ids.push(id);
|
||
}
|
||
|
||
return ids;
|
||
}
|
||
|
||
function buildFieldOptions(records) {
|
||
const fieldMap = new Map();
|
||
|
||
for (const record of records) {
|
||
const flattened = record.flattened || {};
|
||
for (const path of Object.keys(flattened)) {
|
||
if (!FIELD_LABEL_MAP[path]) {
|
||
continue;
|
||
}
|
||
if (!fieldMap.has(path)) {
|
||
fieldMap.set(path, {
|
||
path,
|
||
label: getFieldLabel(path),
|
||
});
|
||
}
|
||
}
|
||
}
|
||
|
||
return Array.from(fieldMap.values()).sort((left, right) =>
|
||
left.path.localeCompare(right.path, "zh-CN"),
|
||
);
|
||
}
|
||
|
||
function getFieldLabel(path) {
|
||
if (FIELD_LABEL_MAP[path]) {
|
||
return FIELD_LABEL_MAP[path];
|
||
}
|
||
|
||
for (const [namespace, namespaceLabel] of Object.entries(NAMESPACE_LABEL_MAP)) {
|
||
if (path === namespace) {
|
||
return namespaceLabel;
|
||
}
|
||
if (path.startsWith(`${namespace}.`)) {
|
||
return `${namespaceLabel} - ${path.slice(namespace.length + 1)}`;
|
||
}
|
||
}
|
||
|
||
return FIELD_LABEL_MAP[path] || path;
|
||
}
|
||
|
||
function pickDefaultFields(fieldOptions) {
|
||
return fieldOptions.slice(0, 12).map((field) => field.path);
|
||
}
|
||
|
||
function buildExportRows(records, selectedFields) {
|
||
return records.map((record) => {
|
||
const row = {};
|
||
for (const field of selectedFields) {
|
||
row[field] = record.flattened[field] || "";
|
||
}
|
||
return row;
|
||
});
|
||
}
|
||
|
||
function escapeXml(value) {
|
||
return String(value)
|
||
.replace(/&/g, "&")
|
||
.replace(/</g, "<")
|
||
.replace(/>/g, ">")
|
||
.replace(/"/g, """)
|
||
.replace(/'/g, "'");
|
||
}
|
||
|
||
function sanitizeSheetName(value) {
|
||
const name = normalizeScalar(value) || "Sheet1";
|
||
return name.replace(/[\\/?*:[\]]/g, "_").slice(0, 31) || "Sheet1";
|
||
}
|
||
|
||
function buildSpreadsheetXml(config) {
|
||
const sheetName = sanitizeSheetName(config.sheetName || "达人数据");
|
||
const columns = Array.isArray(config.columns) ? config.columns : [];
|
||
const headers =
|
||
Array.isArray(config.headers) && config.headers.length === columns.length
|
||
? config.headers
|
||
: columns;
|
||
const rows = Array.isArray(config.rows) ? config.rows : [];
|
||
const headerCells = columns
|
||
.map(
|
||
(column, index) =>
|
||
`<Cell><Data ss:Type="String">${escapeXml(headers[index] ?? column)}</Data></Cell>`,
|
||
)
|
||
.join("");
|
||
|
||
const dataRows = rows
|
||
.map((row) => {
|
||
const cells = columns
|
||
.map((column) => {
|
||
const value = row[column] === undefined ? "" : row[column];
|
||
return `<Cell><Data ss:Type="String">${escapeXml(value)}</Data></Cell>`;
|
||
})
|
||
.join("");
|
||
return `<Row>${cells}</Row>`;
|
||
})
|
||
.join("");
|
||
|
||
return `<?xml version="1.0" encoding="UTF-8"?>
|
||
<?mso-application progid="Excel.Sheet"?>
|
||
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
|
||
xmlns:o="urn:schemas-microsoft-com:office:office"
|
||
xmlns:x="urn:schemas-microsoft-com:office:excel"
|
||
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
|
||
xmlns:html="http://www.w3.org/TR/REC-html40">
|
||
<Worksheet ss:Name="${escapeXml(sheetName)}">
|
||
<Table>
|
||
<Row>${headerCells}</Row>
|
||
${dataRows}
|
||
</Table>
|
||
</Worksheet>
|
||
</Workbook>`;
|
||
}
|
||
|
||
function escapeCsvValue(value) {
|
||
const text = normalizeScalar(value);
|
||
if (/["\n,\r]/.test(text)) {
|
||
return `"${text.replace(/"/g, '""')}"`;
|
||
}
|
||
return text;
|
||
}
|
||
|
||
function buildCsvContent(config) {
|
||
const columns = Array.isArray(config.columns) ? config.columns : [];
|
||
const headers =
|
||
Array.isArray(config.headers) && config.headers.length === columns.length
|
||
? config.headers
|
||
: columns;
|
||
const rows = Array.isArray(config.rows) ? config.rows : [];
|
||
const headerLine = headers.map(escapeCsvValue).join(",");
|
||
const bodyLines = rows.map((row) =>
|
||
columns
|
||
.map((column) => escapeCsvValue(row[column] === undefined ? "" : row[column]))
|
||
.join(","),
|
||
);
|
||
|
||
return `\uFEFF${[headerLine, ...bodyLines].join("\r\n")}`;
|
||
}
|
||
|
||
function buildXlsxContent(config) {
|
||
// Lazy require so the rest of the module stays usable without deps (e.g. pure parsing tests).
|
||
// In this repo we install it via package.json.
|
||
// eslint-disable-next-line global-require, import/no-extraneous-dependencies
|
||
const XLSX = require("xlsx");
|
||
|
||
const sheetName = sanitizeSheetName(config.sheetName || "达人数据");
|
||
const columns = Array.isArray(config.columns) ? config.columns : [];
|
||
const headers =
|
||
Array.isArray(config.headers) && config.headers.length === columns.length
|
||
? config.headers
|
||
: columns;
|
||
const rows = Array.isArray(config.rows) ? config.rows : [];
|
||
|
||
const aoa = [headers.slice()];
|
||
for (const row of rows) {
|
||
aoa.push(
|
||
columns.map((column) => {
|
||
const value = row[column] === undefined ? "" : row[column];
|
||
return normalizeScalar(value);
|
||
}),
|
||
);
|
||
}
|
||
|
||
const ws = XLSX.utils.aoa_to_sheet(aoa);
|
||
const wb = XLSX.utils.book_new();
|
||
XLSX.utils.book_append_sheet(wb, ws, sheetName);
|
||
return XLSX.write(wb, { bookType: "xlsx", type: "buffer" });
|
||
}
|
||
|
||
function formatTimestamp(date) {
|
||
const safeDate = date instanceof Date ? date : new Date();
|
||
const parts = [
|
||
safeDate.getFullYear(),
|
||
String(safeDate.getMonth() + 1).padStart(2, "0"),
|
||
String(safeDate.getDate()).padStart(2, "0"),
|
||
"-",
|
||
String(safeDate.getHours()).padStart(2, "0"),
|
||
String(safeDate.getMinutes()).padStart(2, "0"),
|
||
String(safeDate.getSeconds()).padStart(2, "0"),
|
||
];
|
||
return parts.join("");
|
||
}
|
||
|
||
function unwrapResponsePayload(json) {
|
||
if (isPlainObject(json?.data)) {
|
||
return json.data;
|
||
}
|
||
if (isPlainObject(json?.result)) {
|
||
return json.result;
|
||
}
|
||
if (isPlainObject(json)) {
|
||
return json;
|
||
}
|
||
return { value: json };
|
||
}
|
||
|
||
async function fetchBloggerRecord(id, fetchImpl) {
|
||
if (typeof fetchImpl !== "function") {
|
||
throw new Error("当前环境不支持 fetch,无法请求达人数据。");
|
||
}
|
||
|
||
const response = await fetchImpl(`${API_BASE}${encodeURIComponent(id)}`, {
|
||
method: "GET",
|
||
credentials: "include",
|
||
headers: {
|
||
accept: "application/json, text/plain, */*",
|
||
},
|
||
});
|
||
|
||
if (!response || !response.ok) {
|
||
const status = response ? response.status : "unknown";
|
||
throw new Error(`请求达人 ${id} 失败,状态码:${status}`);
|
||
}
|
||
|
||
const json = await response.json();
|
||
const payload = unwrapResponsePayload(json);
|
||
if (!Object.prototype.hasOwnProperty.call(payload, "id")) {
|
||
payload.id = id;
|
||
}
|
||
return payload;
|
||
}
|
||
|
||
async function fetchSupplementalPayload(userId, fetchImpl, config) {
|
||
const response = await fetchImpl(config.buildUrl(userId), {
|
||
method: "GET",
|
||
credentials: "include",
|
||
headers: {
|
||
accept: "application/json, text/plain, */*",
|
||
},
|
||
});
|
||
|
||
if (!response || !response.ok) {
|
||
const status = response ? response.status : "unknown";
|
||
throw new Error(
|
||
`请求补充数据 ${config.namespace} 失败,userId=${userId},状态码:${status}`,
|
||
);
|
||
}
|
||
|
||
const json = await response.json();
|
||
return unwrapResponsePayload(json);
|
||
}
|
||
|
||
async function fetchMergedBloggerRecord(id, fetchImpl) {
|
||
const primaryPayload = await fetchBloggerRecord(id, fetchImpl);
|
||
const userId = primaryPayload.userId || primaryPayload.id || id;
|
||
|
||
const settledPayloads = await Promise.allSettled(
|
||
SUPPLEMENTAL_ENDPOINTS.map((config) =>
|
||
fetchSupplementalPayload(userId, fetchImpl, config).then((payload) => ({
|
||
namespace: config.namespace,
|
||
payload,
|
||
})),
|
||
),
|
||
);
|
||
|
||
const mergedPayload = {
|
||
...primaryPayload,
|
||
};
|
||
|
||
for (const result of settledPayloads) {
|
||
if (result.status !== "fulfilled") {
|
||
continue;
|
||
}
|
||
mergedPayload[result.value.namespace] = result.value.payload;
|
||
}
|
||
|
||
return mergedPayload;
|
||
}
|
||
|
||
function createExportController(options) {
|
||
const settings = options || {};
|
||
const now = settings.now || (() => new Date());
|
||
const fetchImpl = settings.fetchImpl;
|
||
let cachedRecords = [];
|
||
let cachedFields = [];
|
||
|
||
return {
|
||
async preview(rawInput) {
|
||
const ids = parseCreatorInputs(rawInput);
|
||
if (!ids.length) {
|
||
throw new Error("请输入至少一个有效的达人主页链接或达人 ID。");
|
||
}
|
||
|
||
const records = [];
|
||
for (const id of ids) {
|
||
const raw = await fetchMergedBloggerRecord(id, fetchImpl);
|
||
records.push({
|
||
id,
|
||
raw,
|
||
flattened: flattenRecord(raw),
|
||
});
|
||
}
|
||
|
||
cachedRecords = records;
|
||
cachedFields = buildFieldOptions(records);
|
||
|
||
return {
|
||
ids,
|
||
records,
|
||
fields: cachedFields,
|
||
selectedFields: pickDefaultFields(cachedFields),
|
||
};
|
||
},
|
||
|
||
exportSheet(selectedFields) {
|
||
if (!cachedRecords.length) {
|
||
throw new Error("请先读取字段并确认达人数据。");
|
||
}
|
||
|
||
const fields =
|
||
Array.isArray(selectedFields) && selectedFields.length
|
||
? selectedFields
|
||
: cachedFields.map((field) => field.path);
|
||
|
||
const rows = buildExportRows(cachedRecords, fields);
|
||
const headers = fields.map((field) => getFieldLabel(field));
|
||
const content = buildXlsxContent({
|
||
columns: fields,
|
||
headers,
|
||
rows,
|
||
sheetName: "达人数据",
|
||
});
|
||
|
||
return {
|
||
filename: `xhs-bloggers-${formatTimestamp(now())}.xlsx`,
|
||
columns: fields,
|
||
headers,
|
||
rows,
|
||
content,
|
||
};
|
||
},
|
||
|
||
getState() {
|
||
return {
|
||
records: cachedRecords.slice(),
|
||
fields: cachedFields.slice(),
|
||
};
|
||
},
|
||
};
|
||
}
|
||
|
||
module.exports = {
|
||
API_BASE,
|
||
SUPPLEMENTAL_ENDPOINTS,
|
||
buildExportRows,
|
||
buildCsvContent,
|
||
buildFieldOptions,
|
||
buildSpreadsheetXml,
|
||
buildXlsxContent,
|
||
createExportController,
|
||
extractBloggerId,
|
||
fetchMergedBloggerRecord,
|
||
flattenRecord,
|
||
getFieldLabel,
|
||
parseCreatorInputs,
|
||
};
|