scriptCat/pugongying/src/xhs-pgy-export-core.js

596 lines
16 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const API_BASE =
"https://pgy.xiaohongshu.com/api/solar/cooperator/user/blogger/";
const SUPPLEMENTAL_ENDPOINTS = [
{
namespace: "dataSummary",
buildUrl: (userId) =>
`https://pgy.xiaohongshu.com/api/pgy/kol/data/data_summary?userId=${encodeURIComponent(
userId,
)}&business=1`,
},
{
namespace: "fansSummary",
buildUrl: (userId) =>
`https://pgy.xiaohongshu.com/api/solar/kol/data_v3/fans_summary?userId=${encodeURIComponent(
userId,
)}`,
},
{
namespace: "fansProfile",
buildUrl: (userId) =>
`https://pgy.xiaohongshu.com/api/solar/kol/data/${encodeURIComponent(
userId,
)}/fans_profile`,
},
];
const NAMESPACE_LABEL_MAP = {
dataSummary: "数据概览",
fansSummary: "粉丝概览",
fansProfile: "粉丝画像",
};
const FIELD_LABEL_MAP = {
id: "ID",
"metrics.fans": "粉丝数",
dataSummary: "数据概览",
fansSummary: "粉丝概览",
fansProfile: "粉丝画像",
"dataSummary.fans30GrowthRate": "近30天涨粉率",
"dataSummary.mAccumImpCompare": "曝光中位数超越率",
"dataSummary.noteType": "笔记内容类型",
"dataSummary.activeDayInLast7": "近7天活跃天数",
"dataSummary.responseRate": "响应率",
"dataSummary.avgRead": "平均阅读量",
"fansProfile.ages": "粉丝年龄分布",
"fansProfile.gender.male": "粉丝男性占比",
"fansProfile.gender.female": "粉丝女性占比",
"fansProfile.interests": "粉丝兴趣分布",
"fansProfile.provinces": "粉丝省份分布",
"fansProfile.cities": "粉丝城市分布",
"fansProfile.devices": "粉丝设备分布",
"fansProfile.dateKey": "画像日期",
"fansSummary.fansNum": "粉丝总数",
"fansSummary.fansIncreaseNum": "涨粉数",
"fansSummary.fansGrowthRate": "粉丝增长率",
"fansSummary.fansGrowthBeyondRate": "粉丝增长超越率",
"fansSummary.activeFansL28": "近28天活跃粉丝数",
"fansSummary.activeFansRate": "活跃粉丝占比",
"fansSummary.activeFansBeyondRate": "活跃粉丝超越率",
"fansSummary.engageFansRate": "互动粉丝占比",
"fansSummary.engageFansL30": "近30天互动粉丝数",
"fansSummary.engageFansBeyondRate": "互动粉丝超越率",
"fansSummary.readFansIn30": "近30天阅读粉丝数",
"fansSummary.readFansRate": "阅读粉丝占比",
"fansSummary.readFansBeyondRate": "阅读粉丝超越率",
"fansSummary.payFansUserRate30d": "近30天支付粉丝占比",
"fansSummary.payFansUserNum30d": "近30天支付粉丝数",
userId: "达人ID",
fansCount: "粉丝数",
name: "达人昵称",
redId: "小红书号",
location: "地区",
travelAreaList: "常驻地区",
personalTags: "人设标签",
contentTags: "内容标签",
likeCollectCountInfo: "获赞与收藏",
businessNoteCount: "商业笔记数",
totalNoteCount: "总笔记数",
picturePrice: "图文报价",
videoPrice: "视频报价",
lowerPrice: "最低报价",
userType: "用户类型",
tradeType: "合作行业",
clickMidNum: "阅读中位数",
accumCoopImpMedinNum30d: "近30天合作曝光中位数",
mEngagementNum: "互动中位数",
"clothingIndustryPrice.picturePrice": "服饰行业图文报价",
};
function isPlainObject(value) {
return Object.prototype.toString.call(value) === "[object Object]";
}
function normalizeScalar(value) {
if (value === null || value === undefined) {
return "";
}
if (typeof value === "string") {
return value.trim();
}
if (
typeof value === "number" ||
typeof value === "boolean" ||
typeof value === "bigint"
) {
return String(value);
}
if (value instanceof Date) {
return value.toISOString();
}
return String(value);
}
function summarizeArray(list) {
if (!Array.isArray(list) || list.length === 0) {
return "";
}
const allScalar = list.every(
(item) =>
item === null ||
item === undefined ||
["string", "number", "boolean", "bigint"].includes(typeof item),
);
if (allScalar) {
return list.map(normalizeScalar).filter(Boolean).join(" | ");
}
return list
.map((item) => {
if (isPlainObject(item) || Array.isArray(item)) {
try {
return JSON.stringify(item);
} catch (error) {
return String(item);
}
}
return normalizeScalar(item);
})
.filter(Boolean)
.join(" | ");
}
function flattenRecord(record, prefix, target) {
const baseTarget = target || {};
const currentPrefix = prefix || "";
if (!isPlainObject(record)) {
if (currentPrefix) {
baseTarget[currentPrefix] = normalizeScalar(record);
}
return baseTarget;
}
const keys = Object.keys(record);
if (keys.length === 0 && currentPrefix) {
baseTarget[currentPrefix] = "";
return baseTarget;
}
for (const key of keys) {
const nextPath = currentPrefix ? `${currentPrefix}.${key}` : key;
const value = record[key];
if (Array.isArray(value)) {
baseTarget[nextPath] = summarizeArray(value);
continue;
}
if (isPlainObject(value)) {
flattenRecord(value, nextPath, baseTarget);
continue;
}
baseTarget[nextPath] = normalizeScalar(value);
}
return baseTarget;
}
function extractBloggerId(value) {
const raw = normalizeScalar(value);
if (!raw) {
return "";
}
if (/^[0-9a-f]{24}$/i.test(raw)) {
return raw;
}
if (!/^https?:\/\//i.test(raw)) {
return "";
}
let parsedUrl;
try {
parsedUrl = new URL(raw);
} catch (error) {
return "";
}
const queryCandidates = ["id", "user_id", "userId", "bloggerId", "creatorId"];
for (const key of queryCandidates) {
const queryValue = parsedUrl.searchParams.get(key);
if (queryValue && /^[0-9a-f]{24}$/i.test(queryValue)) {
return queryValue;
}
}
const segments = parsedUrl.pathname
.split("/")
.map((segment) => segment.trim())
.filter(Boolean)
.reverse();
for (const segment of segments) {
if (/^[0-9a-f]{24}$/i.test(segment)) {
return segment;
}
}
return "";
}
function parseCreatorInputs(rawInput) {
const values = normalizeScalar(rawInput)
.split(/[\n,\s]+/)
.map((item) => item.trim())
.filter(Boolean);
const ids = [];
const seen = new Set();
for (const value of values) {
const id = extractBloggerId(value);
if (!id || seen.has(id)) {
continue;
}
seen.add(id);
ids.push(id);
}
return ids;
}
function buildFieldOptions(records) {
const fieldMap = new Map();
for (const record of records) {
const flattened = record.flattened || {};
for (const path of Object.keys(flattened)) {
if (!FIELD_LABEL_MAP[path]) {
continue;
}
if (!fieldMap.has(path)) {
fieldMap.set(path, {
path,
label: getFieldLabel(path),
});
}
}
}
return Array.from(fieldMap.values()).sort((left, right) =>
left.path.localeCompare(right.path, "zh-CN"),
);
}
function getFieldLabel(path) {
if (FIELD_LABEL_MAP[path]) {
return FIELD_LABEL_MAP[path];
}
for (const [namespace, namespaceLabel] of Object.entries(NAMESPACE_LABEL_MAP)) {
if (path === namespace) {
return namespaceLabel;
}
if (path.startsWith(`${namespace}.`)) {
return `${namespaceLabel} - ${path.slice(namespace.length + 1)}`;
}
}
return FIELD_LABEL_MAP[path] || path;
}
function pickDefaultFields(fieldOptions) {
return fieldOptions.slice(0, 12).map((field) => field.path);
}
function buildExportRows(records, selectedFields) {
return records.map((record) => {
const row = {};
for (const field of selectedFields) {
row[field] = record.flattened[field] || "";
}
return row;
});
}
function escapeXml(value) {
return String(value)
.replace(/&/g, "&")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&apos;");
}
function sanitizeSheetName(value) {
const name = normalizeScalar(value) || "Sheet1";
return name.replace(/[\\/?*:[\]]/g, "_").slice(0, 31) || "Sheet1";
}
function buildSpreadsheetXml(config) {
const sheetName = sanitizeSheetName(config.sheetName || "达人数据");
const columns = Array.isArray(config.columns) ? config.columns : [];
const headers =
Array.isArray(config.headers) && config.headers.length === columns.length
? config.headers
: columns;
const rows = Array.isArray(config.rows) ? config.rows : [];
const headerCells = columns
.map(
(column, index) =>
`<Cell><Data ss:Type="String">${escapeXml(headers[index] ?? column)}</Data></Cell>`,
)
.join("");
const dataRows = rows
.map((row) => {
const cells = columns
.map((column) => {
const value = row[column] === undefined ? "" : row[column];
return `<Cell><Data ss:Type="String">${escapeXml(value)}</Data></Cell>`;
})
.join("");
return `<Row>${cells}</Row>`;
})
.join("");
return `<?xml version="1.0" encoding="UTF-8"?>
<?mso-application progid="Excel.Sheet"?>
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:x="urn:schemas-microsoft-com:office:excel"
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
xmlns:html="http://www.w3.org/TR/REC-html40">
<Worksheet ss:Name="${escapeXml(sheetName)}">
<Table>
<Row>${headerCells}</Row>
${dataRows}
</Table>
</Worksheet>
</Workbook>`;
}
function escapeCsvValue(value) {
const text = normalizeScalar(value);
if (/["\n,\r]/.test(text)) {
return `"${text.replace(/"/g, '""')}"`;
}
return text;
}
function buildCsvContent(config) {
const columns = Array.isArray(config.columns) ? config.columns : [];
const headers =
Array.isArray(config.headers) && config.headers.length === columns.length
? config.headers
: columns;
const rows = Array.isArray(config.rows) ? config.rows : [];
const headerLine = headers.map(escapeCsvValue).join(",");
const bodyLines = rows.map((row) =>
columns
.map((column) => escapeCsvValue(row[column] === undefined ? "" : row[column]))
.join(","),
);
return `\uFEFF${[headerLine, ...bodyLines].join("\r\n")}`;
}
function buildXlsxContent(config) {
// Lazy require so the rest of the module stays usable without deps (e.g. pure parsing tests).
// In this repo we install it via package.json.
// eslint-disable-next-line global-require, import/no-extraneous-dependencies
const XLSX = require("xlsx");
const sheetName = sanitizeSheetName(config.sheetName || "达人数据");
const columns = Array.isArray(config.columns) ? config.columns : [];
const headers =
Array.isArray(config.headers) && config.headers.length === columns.length
? config.headers
: columns;
const rows = Array.isArray(config.rows) ? config.rows : [];
const aoa = [headers.slice()];
for (const row of rows) {
aoa.push(
columns.map((column) => {
const value = row[column] === undefined ? "" : row[column];
return normalizeScalar(value);
}),
);
}
const ws = XLSX.utils.aoa_to_sheet(aoa);
const wb = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(wb, ws, sheetName);
return XLSX.write(wb, { bookType: "xlsx", type: "buffer" });
}
function formatTimestamp(date) {
const safeDate = date instanceof Date ? date : new Date();
const parts = [
safeDate.getFullYear(),
String(safeDate.getMonth() + 1).padStart(2, "0"),
String(safeDate.getDate()).padStart(2, "0"),
"-",
String(safeDate.getHours()).padStart(2, "0"),
String(safeDate.getMinutes()).padStart(2, "0"),
String(safeDate.getSeconds()).padStart(2, "0"),
];
return parts.join("");
}
function unwrapResponsePayload(json) {
if (isPlainObject(json?.data)) {
return json.data;
}
if (isPlainObject(json?.result)) {
return json.result;
}
if (isPlainObject(json)) {
return json;
}
return { value: json };
}
async function fetchBloggerRecord(id, fetchImpl) {
if (typeof fetchImpl !== "function") {
throw new Error("当前环境不支持 fetch无法请求达人数据。");
}
const response = await fetchImpl(`${API_BASE}${encodeURIComponent(id)}`, {
method: "GET",
credentials: "include",
headers: {
accept: "application/json, text/plain, */*",
},
});
if (!response || !response.ok) {
const status = response ? response.status : "unknown";
throw new Error(`请求达人 ${id} 失败,状态码:${status}`);
}
const json = await response.json();
const payload = unwrapResponsePayload(json);
if (!Object.prototype.hasOwnProperty.call(payload, "id")) {
payload.id = id;
}
return payload;
}
async function fetchSupplementalPayload(userId, fetchImpl, config) {
const response = await fetchImpl(config.buildUrl(userId), {
method: "GET",
credentials: "include",
headers: {
accept: "application/json, text/plain, */*",
},
});
if (!response || !response.ok) {
const status = response ? response.status : "unknown";
throw new Error(
`请求补充数据 ${config.namespace} 失败userId=${userId},状态码:${status}`,
);
}
const json = await response.json();
return unwrapResponsePayload(json);
}
async function fetchMergedBloggerRecord(id, fetchImpl) {
const primaryPayload = await fetchBloggerRecord(id, fetchImpl);
const userId = primaryPayload.userId || primaryPayload.id || id;
const settledPayloads = await Promise.allSettled(
SUPPLEMENTAL_ENDPOINTS.map((config) =>
fetchSupplementalPayload(userId, fetchImpl, config).then((payload) => ({
namespace: config.namespace,
payload,
})),
),
);
const mergedPayload = {
...primaryPayload,
};
for (const result of settledPayloads) {
if (result.status !== "fulfilled") {
continue;
}
mergedPayload[result.value.namespace] = result.value.payload;
}
return mergedPayload;
}
function createExportController(options) {
const settings = options || {};
const now = settings.now || (() => new Date());
const fetchImpl = settings.fetchImpl;
let cachedRecords = [];
let cachedFields = [];
return {
async preview(rawInput) {
const ids = parseCreatorInputs(rawInput);
if (!ids.length) {
throw new Error("请输入至少一个有效的达人主页链接或达人 ID。");
}
const records = [];
for (const id of ids) {
const raw = await fetchMergedBloggerRecord(id, fetchImpl);
records.push({
id,
raw,
flattened: flattenRecord(raw),
});
}
cachedRecords = records;
cachedFields = buildFieldOptions(records);
return {
ids,
records,
fields: cachedFields,
selectedFields: pickDefaultFields(cachedFields),
};
},
exportSheet(selectedFields) {
if (!cachedRecords.length) {
throw new Error("请先读取字段并确认达人数据。");
}
const fields =
Array.isArray(selectedFields) && selectedFields.length
? selectedFields
: cachedFields.map((field) => field.path);
const rows = buildExportRows(cachedRecords, fields);
const headers = fields.map((field) => getFieldLabel(field));
const content = buildXlsxContent({
columns: fields,
headers,
rows,
sheetName: "达人数据",
});
return {
filename: `xhs-bloggers-${formatTimestamp(now())}.xlsx`,
columns: fields,
headers,
rows,
content,
};
},
getState() {
return {
records: cachedRecords.slice(),
fields: cachedFields.slice(),
};
},
};
}
module.exports = {
API_BASE,
SUPPLEMENTAL_ENDPOINTS,
buildExportRows,
buildCsvContent,
buildFieldOptions,
buildSpreadsheetXml,
buildXlsxContent,
createExportController,
extractBloggerId,
fetchMergedBloggerRecord,
flattenRecord,
getFieldLabel,
parseCreatorInputs,
};