feat(pugongying): export xlsx via sheetjs
This commit is contained in:
parent
2ddd4bb5ca
commit
2a598d65fc
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
**/node_modules/
|
||||
**/.DS_Store
|
||||
118
pugongying/package-lock.json
generated
Normal file
118
pugongying/package-lock.json
generated
Normal file
@ -0,0 +1,118 @@
|
||||
{
|
||||
"name": "browser-script",
|
||||
"version": "1.0.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "browser-script",
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"xlsx": "^0.18.5"
|
||||
}
|
||||
},
|
||||
"node_modules/adler-32": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz",
|
||||
"integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/cfb": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz",
|
||||
"integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"adler-32": "~1.3.0",
|
||||
"crc-32": "~1.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/codepage": {
|
||||
"version": "1.15.0",
|
||||
"resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz",
|
||||
"integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/crc-32": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz",
|
||||
"integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==",
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"crc32": "bin/crc32.njs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/frac": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz",
|
||||
"integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/ssf": {
|
||||
"version": "0.11.2",
|
||||
"resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz",
|
||||
"integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"frac": "~1.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/wmf": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz",
|
||||
"integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/word": {
|
||||
"version": "0.3.0",
|
||||
"resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz",
|
||||
"integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/xlsx": {
|
||||
"version": "0.18.5",
|
||||
"resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz",
|
||||
"integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"adler-32": "~1.3.0",
|
||||
"cfb": "~1.2.1",
|
||||
"codepage": "~1.15.0",
|
||||
"crc-32": "~1.2.1",
|
||||
"ssf": "~0.11.2",
|
||||
"wmf": "~1.0.1",
|
||||
"word": "~0.3.0"
|
||||
},
|
||||
"bin": {
|
||||
"xlsx": "bin/xlsx.njs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.8"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2,6 +2,9 @@
|
||||
"name": "browser-script",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"xlsx": "^0.18.5"
|
||||
},
|
||||
"scripts": {
|
||||
"test": "node --test",
|
||||
"test:coverage": "node --test --experimental-test-coverage"
|
||||
|
||||
@ -376,6 +376,36 @@ function buildCsvContent(config) {
|
||||
return `\uFEFF${[headerLine, ...bodyLines].join("\r\n")}`;
|
||||
}
|
||||
|
||||
function buildXlsxContent(config) {
|
||||
// Lazy require so the rest of the module stays usable without deps (e.g. pure parsing tests).
|
||||
// In this repo we install it via package.json.
|
||||
// eslint-disable-next-line global-require, import/no-extraneous-dependencies
|
||||
const XLSX = require("xlsx");
|
||||
|
||||
const sheetName = sanitizeSheetName(config.sheetName || "达人数据");
|
||||
const columns = Array.isArray(config.columns) ? config.columns : [];
|
||||
const headers =
|
||||
Array.isArray(config.headers) && config.headers.length === columns.length
|
||||
? config.headers
|
||||
: columns;
|
||||
const rows = Array.isArray(config.rows) ? config.rows : [];
|
||||
|
||||
const aoa = [headers.slice()];
|
||||
for (const row of rows) {
|
||||
aoa.push(
|
||||
columns.map((column) => {
|
||||
const value = row[column] === undefined ? "" : row[column];
|
||||
return normalizeScalar(value);
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
const ws = XLSX.utils.aoa_to_sheet(aoa);
|
||||
const wb = XLSX.utils.book_new();
|
||||
XLSX.utils.book_append_sheet(wb, ws, sheetName);
|
||||
return XLSX.write(wb, { bookType: "xlsx", type: "buffer" });
|
||||
}
|
||||
|
||||
function formatTimestamp(date) {
|
||||
const safeDate = date instanceof Date ? date : new Date();
|
||||
const parts = [
|
||||
@ -523,7 +553,7 @@ function createExportController(options) {
|
||||
|
||||
const rows = buildExportRows(cachedRecords, fields);
|
||||
const headers = fields.map((field) => getFieldLabel(field));
|
||||
const content = buildSpreadsheetXml({
|
||||
const content = buildXlsxContent({
|
||||
columns: fields,
|
||||
headers,
|
||||
rows,
|
||||
@ -531,7 +561,7 @@ function createExportController(options) {
|
||||
});
|
||||
|
||||
return {
|
||||
filename: `xhs-bloggers-${formatTimestamp(now())}.xls`,
|
||||
filename: `xhs-bloggers-${formatTimestamp(now())}.xlsx`,
|
||||
columns: fields,
|
||||
headers,
|
||||
rows,
|
||||
@ -555,6 +585,7 @@ module.exports = {
|
||||
buildCsvContent,
|
||||
buildFieldOptions,
|
||||
buildSpreadsheetXml,
|
||||
buildXlsxContent,
|
||||
createExportController,
|
||||
extractBloggerId,
|
||||
fetchMergedBloggerRecord,
|
||||
|
||||
@ -192,14 +192,12 @@ test("createExportController previews and exports creator data", async () => {
|
||||
);
|
||||
|
||||
const exported = controller.exportSheet(["id", "name", "metrics.fans"]);
|
||||
assert.equal(exported.filename, "xhs-bloggers-20260312-160910.xls");
|
||||
assert.equal(exported.filename, "xhs-bloggers-20260312-160910.xlsx");
|
||||
assert.equal(exported.rows.length, 2);
|
||||
assert.deepEqual(exported.headers, ["ID", "达人昵称", "粉丝数"]);
|
||||
assert.match(exported.content, /<\?mso-application progid="Excel\.Sheet"\?>/);
|
||||
assert.match(exported.content, /<Worksheet ss:Name="达人数据">/);
|
||||
assert.match(exported.content, /达人昵称/);
|
||||
assert.match(exported.content, /达人-08d5/);
|
||||
assert.match(exported.content, /达人-3456/);
|
||||
assert.ok(Buffer.isBuffer(exported.content));
|
||||
assert.equal(exported.content[0], 0x50); // P
|
||||
assert.equal(exported.content[1], 0x4b); // K
|
||||
});
|
||||
|
||||
test("createExportController merges supplemental endpoint payloads into namespaced fields", async () => {
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
// ==UserScript==
|
||||
// @name 小红书蒲公英达人信息导出
|
||||
// @namespace https://pgy.xiaohongshu.com/
|
||||
// @version 0.1.0
|
||||
// @version 0.1.1
|
||||
// @description 输入达人主页链接或达人 ID,勾选字段后导出 Excel
|
||||
// @match https://pgy.xiaohongshu.com/*
|
||||
// @grant none
|
||||
// @require https://cdn.jsdelivr.net/npm/xlsx@0.18.5/dist/xlsx.full.min.js
|
||||
// ==/UserScript==
|
||||
|
||||
(function bootstrap(root, factory) {
|
||||
@ -301,74 +302,6 @@
|
||||
});
|
||||
}
|
||||
|
||||
function escapeCsvValue(value) {
|
||||
const text = normalizeScalar(value);
|
||||
if (/["\n,\r]/.test(text)) {
|
||||
return `"${text.replace(/"/g, '""')}"`;
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
function buildCsvContent(config) {
|
||||
const columns = Array.isArray(config.columns) ? config.columns : [];
|
||||
const headers =
|
||||
Array.isArray(config.headers) && config.headers.length === columns.length
|
||||
? config.headers
|
||||
: columns;
|
||||
const rows = Array.isArray(config.rows) ? config.rows : [];
|
||||
const headerLine = headers.map(escapeCsvValue).join(",");
|
||||
const bodyLines = rows.map((row) =>
|
||||
columns
|
||||
.map((column) => escapeCsvValue(row[column] === undefined ? "" : row[column]))
|
||||
.join(","),
|
||||
);
|
||||
|
||||
return `\uFEFF${[headerLine, ...bodyLines].join("\r\n")}`;
|
||||
}
|
||||
|
||||
function buildSpreadsheetXml(config) {
|
||||
const sheetName = typeof config.sheetName === "string" ? config.sheetName : "达人数据";
|
||||
const columns = Array.isArray(config.columns) ? config.columns : [];
|
||||
const headers =
|
||||
Array.isArray(config.headers) && config.headers.length === columns.length
|
||||
? config.headers
|
||||
: columns;
|
||||
const rows = Array.isArray(config.rows) ? config.rows : [];
|
||||
const headerCells = columns
|
||||
.map(
|
||||
(column, index) =>
|
||||
`<Cell><Data ss:Type="String">${escapeXml(headers[index] ?? column)}</Data></Cell>`,
|
||||
)
|
||||
.join("");
|
||||
|
||||
const dataRows = rows
|
||||
.map((row) => {
|
||||
const cells = columns
|
||||
.map((column) => {
|
||||
const value = row[column] === undefined ? "" : row[column];
|
||||
return `<Cell><Data ss:Type="String">${escapeXml(value)}</Data></Cell>`;
|
||||
})
|
||||
.join("");
|
||||
return `<Row>${cells}</Row>`;
|
||||
})
|
||||
.join("");
|
||||
|
||||
return `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?mso-application progid="Excel.Sheet"?>
|
||||
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
|
||||
xmlns:o="urn:schemas-microsoft-com:office:office"
|
||||
xmlns:x="urn:schemas-microsoft-com:office:excel"
|
||||
xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
|
||||
xmlns:html="http://www.w3.org/TR/REC-html40">
|
||||
<Worksheet ss:Name="${escapeXml(sheetName)}">
|
||||
<Table>
|
||||
<Row>${headerCells}</Row>
|
||||
${dataRows}
|
||||
</Table>
|
||||
</Worksheet>
|
||||
</Workbook>`;
|
||||
}
|
||||
|
||||
function formatTimestamp(date) {
|
||||
const safeDate = date instanceof Date ? date : new Date();
|
||||
const parts = [
|
||||
@ -523,20 +456,24 @@
|
||||
? selectedFields
|
||||
: cachedFields.map((field) => field.path);
|
||||
|
||||
const rows = buildExportRows(cachedRecords, fields);
|
||||
const headers = fields.map((field) => getFieldLabel(field));
|
||||
const content = buildSpreadsheetXml({
|
||||
columns: fields,
|
||||
headers,
|
||||
rows,
|
||||
sheetName: "达人数据",
|
||||
});
|
||||
if (!root.XLSX) {
|
||||
throw new Error("未加载 SheetJS,无法导出 xlsx。");
|
||||
}
|
||||
|
||||
const aoa = [headers.slice()];
|
||||
for (const record of cachedRecords) {
|
||||
aoa.push(fields.map((field) => record.flattened[field] || ""));
|
||||
}
|
||||
const ws = root.XLSX.utils.aoa_to_sheet(aoa);
|
||||
const wb = root.XLSX.utils.book_new();
|
||||
root.XLSX.utils.book_append_sheet(wb, ws, "达人数据");
|
||||
const content = root.XLSX.write(wb, { bookType: "xlsx", type: "array" });
|
||||
|
||||
return {
|
||||
filename: `xhs-bloggers-${formatTimestamp(now())}.xls`,
|
||||
filename: `xhs-bloggers-${formatTimestamp(now())}.xlsx`,
|
||||
columns: fields,
|
||||
headers,
|
||||
rows,
|
||||
content,
|
||||
};
|
||||
},
|
||||
@ -553,14 +490,6 @@
|
||||
|
||||
const headers = fields.map((field) => getFieldLabel(field));
|
||||
const total = cachedRecords.length;
|
||||
const headerCells = headers
|
||||
.map((header) => `<Cell><Data ss:Type="String">${escapeXml(header)}</Data></Cell>`)
|
||||
.join("");
|
||||
const parts = [
|
||||
`<?xml version="1.0" encoding="UTF-8"?>\n<?mso-application progid="Excel.Sheet"?>\n<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"\n xmlns:o="urn:schemas-microsoft-com:office:office"\n xmlns:x="urn:schemas-microsoft-com:office:excel"\n xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"\n xmlns:html="http://www.w3.org/TR/REC-html40">\n <Worksheet ss:Name="${escapeXml(
|
||||
"达人数据",
|
||||
)}">\n <Table>\n <Row>${headerCells}</Row>\n`,
|
||||
];
|
||||
|
||||
const report = (percentage, message) => {
|
||||
if (typeof onProgress !== "function") {
|
||||
@ -569,21 +498,13 @@
|
||||
onProgress(Math.max(0, Math.min(100, percentage)), message || "");
|
||||
};
|
||||
|
||||
report(0, "正在生成 Excel...");
|
||||
report(0, "正在生成 Excel(.xlsx)...");
|
||||
const aoa = [headers.slice()];
|
||||
|
||||
const yieldEvery = 50;
|
||||
for (let index = 0; index < total; index += 1) {
|
||||
const record = cachedRecords[index];
|
||||
const cells = fields
|
||||
.map((field) => {
|
||||
const value =
|
||||
record && record.flattened && record.flattened[field] !== undefined
|
||||
? record.flattened[field]
|
||||
: "";
|
||||
return `<Cell><Data ss:Type="String">${escapeXml(value)}</Data></Cell>`;
|
||||
})
|
||||
.join("");
|
||||
parts.push(` <Row>${cells}</Row>\n`);
|
||||
aoa.push(fields.map((field) => record.flattened[field] || ""));
|
||||
|
||||
const isLast = index === total - 1;
|
||||
if (isLast || (index + 1) % yieldEvery === 0) {
|
||||
@ -593,11 +514,15 @@
|
||||
}
|
||||
}
|
||||
|
||||
parts.push(" </Table>\n </Worksheet>\n</Workbook>");
|
||||
const content = parts.join("");
|
||||
report(100, "正在打包 xlsx...");
|
||||
const XLSX = await ensureXlsx();
|
||||
const ws = XLSX.utils.aoa_to_sheet(aoa);
|
||||
const wb = XLSX.utils.book_new();
|
||||
XLSX.utils.book_append_sheet(wb, ws, "达人数据");
|
||||
const content = XLSX.write(wb, { bookType: "xlsx", type: "array" });
|
||||
|
||||
return {
|
||||
filename: `xhs-bloggers-${formatTimestamp(now())}.xls`,
|
||||
filename: `xhs-bloggers-${formatTimestamp(now())}.xlsx`,
|
||||
columns: fields,
|
||||
headers,
|
||||
content,
|
||||
@ -643,9 +568,49 @@
|
||||
}
|
||||
}
|
||||
|
||||
const XLSX_CDN_URLS = [
|
||||
"https://cdn.jsdelivr.net/npm/xlsx@0.18.5/dist/xlsx.full.min.js",
|
||||
"https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.18.5/xlsx.full.min.js",
|
||||
"https://cdn.bootcdn.net/ajax/libs/xlsx/0.18.5/xlsx.full.min.js",
|
||||
];
|
||||
const loadedScripts = new Map();
|
||||
|
||||
function loadScript(url) {
|
||||
if (loadedScripts.has(url)) {
|
||||
return loadedScripts.get(url);
|
||||
}
|
||||
const promise = new Promise((resolve, reject) => {
|
||||
const script = root.document.createElement("script");
|
||||
script.src = url;
|
||||
script.async = true;
|
||||
script.onload = () => resolve();
|
||||
script.onerror = () => reject(new Error(`加载脚本失败:${url}`));
|
||||
root.document.head.appendChild(script);
|
||||
});
|
||||
loadedScripts.set(url, promise);
|
||||
return promise;
|
||||
}
|
||||
|
||||
async function ensureXlsx() {
|
||||
if (root.XLSX && root.XLSX.utils && typeof root.XLSX.write === "function") {
|
||||
return root.XLSX;
|
||||
}
|
||||
for (const url of XLSX_CDN_URLS) {
|
||||
try {
|
||||
await loadScript(url);
|
||||
if (root.XLSX && root.XLSX.utils && typeof root.XLSX.write === "function") {
|
||||
return root.XLSX;
|
||||
}
|
||||
} catch (error) {
|
||||
// try next url
|
||||
}
|
||||
}
|
||||
throw new Error("加载 SheetJS 失败,可能被网络或页面 CSP 限制。");
|
||||
}
|
||||
|
||||
function downloadFile(filename, content) {
|
||||
const blob = new Blob([content], {
|
||||
type: "application/vnd.ms-excel;charset=utf-8",
|
||||
type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
});
|
||||
const link = root.document.createElement("a");
|
||||
const blobUrl = root.URL.createObjectURL(blob);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user