feat(pugongying): export xlsx via sheetjs

This commit is contained in:
wxs 2026-03-13 13:08:30 +08:00
parent 2ddd4bb5ca
commit 2a598d65fc
6 changed files with 226 additions and 109 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
**/node_modules/
**/.DS_Store

118
pugongying/package-lock.json generated Normal file
View File

@ -0,0 +1,118 @@
{
"name": "browser-script",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "browser-script",
"version": "1.0.0",
"dependencies": {
"xlsx": "^0.18.5"
}
},
"node_modules/adler-32": {
"version": "1.3.1",
"resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz",
"integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==",
"license": "Apache-2.0",
"engines": {
"node": ">=0.8"
}
},
"node_modules/cfb": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz",
"integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==",
"license": "Apache-2.0",
"dependencies": {
"adler-32": "~1.3.0",
"crc-32": "~1.2.0"
},
"engines": {
"node": ">=0.8"
}
},
"node_modules/codepage": {
"version": "1.15.0",
"resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz",
"integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==",
"license": "Apache-2.0",
"engines": {
"node": ">=0.8"
}
},
"node_modules/crc-32": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz",
"integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==",
"license": "Apache-2.0",
"bin": {
"crc32": "bin/crc32.njs"
},
"engines": {
"node": ">=0.8"
}
},
"node_modules/frac": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz",
"integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==",
"license": "Apache-2.0",
"engines": {
"node": ">=0.8"
}
},
"node_modules/ssf": {
"version": "0.11.2",
"resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz",
"integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==",
"license": "Apache-2.0",
"dependencies": {
"frac": "~1.1.2"
},
"engines": {
"node": ">=0.8"
}
},
"node_modules/wmf": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz",
"integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==",
"license": "Apache-2.0",
"engines": {
"node": ">=0.8"
}
},
"node_modules/word": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz",
"integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==",
"license": "Apache-2.0",
"engines": {
"node": ">=0.8"
}
},
"node_modules/xlsx": {
"version": "0.18.5",
"resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz",
"integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==",
"license": "Apache-2.0",
"dependencies": {
"adler-32": "~1.3.0",
"cfb": "~1.2.1",
"codepage": "~1.15.0",
"crc-32": "~1.2.1",
"ssf": "~0.11.2",
"wmf": "~1.0.1",
"word": "~0.3.0"
},
"bin": {
"xlsx": "bin/xlsx.njs"
},
"engines": {
"node": ">=0.8"
}
}
}
}

View File

@ -2,6 +2,9 @@
"name": "browser-script",
"version": "1.0.0",
"private": true,
"dependencies": {
"xlsx": "^0.18.5"
},
"scripts": {
"test": "node --test",
"test:coverage": "node --test --experimental-test-coverage"

View File

@ -376,6 +376,36 @@ function buildCsvContent(config) {
return `\uFEFF${[headerLine, ...bodyLines].join("\r\n")}`;
}
function buildXlsxContent(config) {
// Lazy require so the rest of the module stays usable without deps (e.g. pure parsing tests).
// In this repo we install it via package.json.
// eslint-disable-next-line global-require, import/no-extraneous-dependencies
const XLSX = require("xlsx");
const sheetName = sanitizeSheetName(config.sheetName || "达人数据");
const columns = Array.isArray(config.columns) ? config.columns : [];
const headers =
Array.isArray(config.headers) && config.headers.length === columns.length
? config.headers
: columns;
const rows = Array.isArray(config.rows) ? config.rows : [];
const aoa = [headers.slice()];
for (const row of rows) {
aoa.push(
columns.map((column) => {
const value = row[column] === undefined ? "" : row[column];
return normalizeScalar(value);
}),
);
}
const ws = XLSX.utils.aoa_to_sheet(aoa);
const wb = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(wb, ws, sheetName);
return XLSX.write(wb, { bookType: "xlsx", type: "buffer" });
}
function formatTimestamp(date) {
const safeDate = date instanceof Date ? date : new Date();
const parts = [
@ -523,7 +553,7 @@ function createExportController(options) {
const rows = buildExportRows(cachedRecords, fields);
const headers = fields.map((field) => getFieldLabel(field));
const content = buildSpreadsheetXml({
const content = buildXlsxContent({
columns: fields,
headers,
rows,
@ -531,7 +561,7 @@ function createExportController(options) {
});
return {
filename: `xhs-bloggers-${formatTimestamp(now())}.xls`,
filename: `xhs-bloggers-${formatTimestamp(now())}.xlsx`,
columns: fields,
headers,
rows,
@ -555,6 +585,7 @@ module.exports = {
buildCsvContent,
buildFieldOptions,
buildSpreadsheetXml,
buildXlsxContent,
createExportController,
extractBloggerId,
fetchMergedBloggerRecord,

View File

@ -192,14 +192,12 @@ test("createExportController previews and exports creator data", async () => {
);
const exported = controller.exportSheet(["id", "name", "metrics.fans"]);
assert.equal(exported.filename, "xhs-bloggers-20260312-160910.xls");
assert.equal(exported.filename, "xhs-bloggers-20260312-160910.xlsx");
assert.equal(exported.rows.length, 2);
assert.deepEqual(exported.headers, ["ID", "达人昵称", "粉丝数"]);
assert.match(exported.content, /<\?mso-application progid="Excel\.Sheet"\?>/);
assert.match(exported.content, /<Worksheet ss:Name="达人数据">/);
assert.match(exported.content, /达人昵称/);
assert.match(exported.content, /达人-08d5/);
assert.match(exported.content, /达人-3456/);
assert.ok(Buffer.isBuffer(exported.content));
assert.equal(exported.content[0], 0x50); // P
assert.equal(exported.content[1], 0x4b); // K
});
test("createExportController merges supplemental endpoint payloads into namespaced fields", async () => {

View File

@ -1,10 +1,11 @@
// ==UserScript==
// @name 小红书蒲公英达人信息导出
// @namespace https://pgy.xiaohongshu.com/
// @version 0.1.0
// @version 0.1.1
// @description 输入达人主页链接或达人 ID勾选字段后导出 Excel
// @match https://pgy.xiaohongshu.com/*
// @grant none
// @require https://cdn.jsdelivr.net/npm/xlsx@0.18.5/dist/xlsx.full.min.js
// ==/UserScript==
(function bootstrap(root, factory) {
@ -301,74 +302,6 @@
});
}
function escapeCsvValue(value) {
const text = normalizeScalar(value);
if (/["\n,\r]/.test(text)) {
return `"${text.replace(/"/g, '""')}"`;
}
return text;
}
function buildCsvContent(config) {
const columns = Array.isArray(config.columns) ? config.columns : [];
const headers =
Array.isArray(config.headers) && config.headers.length === columns.length
? config.headers
: columns;
const rows = Array.isArray(config.rows) ? config.rows : [];
const headerLine = headers.map(escapeCsvValue).join(",");
const bodyLines = rows.map((row) =>
columns
.map((column) => escapeCsvValue(row[column] === undefined ? "" : row[column]))
.join(","),
);
return `\uFEFF${[headerLine, ...bodyLines].join("\r\n")}`;
}
function buildSpreadsheetXml(config) {
const sheetName = typeof config.sheetName === "string" ? config.sheetName : "达人数据";
const columns = Array.isArray(config.columns) ? config.columns : [];
const headers =
Array.isArray(config.headers) && config.headers.length === columns.length
? config.headers
: columns;
const rows = Array.isArray(config.rows) ? config.rows : [];
const headerCells = columns
.map(
(column, index) =>
`<Cell><Data ss:Type="String">${escapeXml(headers[index] ?? column)}</Data></Cell>`,
)
.join("");
const dataRows = rows
.map((row) => {
const cells = columns
.map((column) => {
const value = row[column] === undefined ? "" : row[column];
return `<Cell><Data ss:Type="String">${escapeXml(value)}</Data></Cell>`;
})
.join("");
return `<Row>${cells}</Row>`;
})
.join("");
return `<?xml version="1.0" encoding="UTF-8"?>
<?mso-application progid="Excel.Sheet"?>
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
xmlns:o="urn:schemas-microsoft-com:office:office"
xmlns:x="urn:schemas-microsoft-com:office:excel"
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
xmlns:html="http://www.w3.org/TR/REC-html40">
<Worksheet ss:Name="${escapeXml(sheetName)}">
<Table>
<Row>${headerCells}</Row>
${dataRows}
</Table>
</Worksheet>
</Workbook>`;
}
function formatTimestamp(date) {
const safeDate = date instanceof Date ? date : new Date();
const parts = [
@ -523,20 +456,24 @@
? selectedFields
: cachedFields.map((field) => field.path);
const rows = buildExportRows(cachedRecords, fields);
const headers = fields.map((field) => getFieldLabel(field));
const content = buildSpreadsheetXml({
columns: fields,
headers,
rows,
sheetName: "达人数据",
});
if (!root.XLSX) {
throw new Error("未加载 SheetJS无法导出 xlsx。");
}
const aoa = [headers.slice()];
for (const record of cachedRecords) {
aoa.push(fields.map((field) => record.flattened[field] || ""));
}
const ws = root.XLSX.utils.aoa_to_sheet(aoa);
const wb = root.XLSX.utils.book_new();
root.XLSX.utils.book_append_sheet(wb, ws, "达人数据");
const content = root.XLSX.write(wb, { bookType: "xlsx", type: "array" });
return {
filename: `xhs-bloggers-${formatTimestamp(now())}.xls`,
filename: `xhs-bloggers-${formatTimestamp(now())}.xlsx`,
columns: fields,
headers,
rows,
content,
};
},
@ -553,14 +490,6 @@
const headers = fields.map((field) => getFieldLabel(field));
const total = cachedRecords.length;
const headerCells = headers
.map((header) => `<Cell><Data ss:Type="String">${escapeXml(header)}</Data></Cell>`)
.join("");
const parts = [
`<?xml version="1.0" encoding="UTF-8"?>\n<?mso-application progid="Excel.Sheet"?>\n<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"\n xmlns:o="urn:schemas-microsoft-com:office:office"\n xmlns:x="urn:schemas-microsoft-com:office:excel"\n xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"\n xmlns:html="http://www.w3.org/TR/REC-html40">\n <Worksheet ss:Name="${escapeXml(
"达人数据",
)}">\n <Table>\n <Row>${headerCells}</Row>\n`,
];
const report = (percentage, message) => {
if (typeof onProgress !== "function") {
@ -569,21 +498,13 @@
onProgress(Math.max(0, Math.min(100, percentage)), message || "");
};
report(0, "正在生成 Excel...");
report(0, "正在生成 Excel(.xlsx)...");
const aoa = [headers.slice()];
const yieldEvery = 50;
for (let index = 0; index < total; index += 1) {
const record = cachedRecords[index];
const cells = fields
.map((field) => {
const value =
record && record.flattened && record.flattened[field] !== undefined
? record.flattened[field]
: "";
return `<Cell><Data ss:Type="String">${escapeXml(value)}</Data></Cell>`;
})
.join("");
parts.push(` <Row>${cells}</Row>\n`);
aoa.push(fields.map((field) => record.flattened[field] || ""));
const isLast = index === total - 1;
if (isLast || (index + 1) % yieldEvery === 0) {
@ -593,11 +514,15 @@
}
}
parts.push(" </Table>\n </Worksheet>\n</Workbook>");
const content = parts.join("");
report(100, "正在打包 xlsx...");
const XLSX = await ensureXlsx();
const ws = XLSX.utils.aoa_to_sheet(aoa);
const wb = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(wb, ws, "达人数据");
const content = XLSX.write(wb, { bookType: "xlsx", type: "array" });
return {
filename: `xhs-bloggers-${formatTimestamp(now())}.xls`,
filename: `xhs-bloggers-${formatTimestamp(now())}.xlsx`,
columns: fields,
headers,
content,
@ -643,9 +568,49 @@
}
}
const XLSX_CDN_URLS = [
"https://cdn.jsdelivr.net/npm/xlsx@0.18.5/dist/xlsx.full.min.js",
"https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.18.5/xlsx.full.min.js",
"https://cdn.bootcdn.net/ajax/libs/xlsx/0.18.5/xlsx.full.min.js",
];
const loadedScripts = new Map();
function loadScript(url) {
if (loadedScripts.has(url)) {
return loadedScripts.get(url);
}
const promise = new Promise((resolve, reject) => {
const script = root.document.createElement("script");
script.src = url;
script.async = true;
script.onload = () => resolve();
script.onerror = () => reject(new Error(`加载脚本失败:${url}`));
root.document.head.appendChild(script);
});
loadedScripts.set(url, promise);
return promise;
}
async function ensureXlsx() {
if (root.XLSX && root.XLSX.utils && typeof root.XLSX.write === "function") {
return root.XLSX;
}
for (const url of XLSX_CDN_URLS) {
try {
await loadScript(url);
if (root.XLSX && root.XLSX.utils && typeof root.XLSX.write === "function") {
return root.XLSX;
}
} catch (error) {
// try next url
}
}
throw new Error("加载 SheetJS 失败,可能被网络或页面 CSP 限制。");
}
function downloadFile(filename, content) {
const blob = new Blob([content], {
type: "application/vnd.ms-excel;charset=utf-8",
type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
});
const link = root.document.createElement("a");
const blobUrl = root.URL.createObjectURL(blob);