From 2a598d65fc6fe5fcc608abd9334aa901d9701663 Mon Sep 17 00:00:00 2001 From: wxs Date: Fri, 13 Mar 2026 13:08:30 +0800 Subject: [PATCH] feat(pugongying): export xlsx via sheetjs --- .gitignore | 2 + pugongying/package-lock.json | 118 ++++++++++++++++++ pugongying/package.json | 3 + pugongying/src/xhs-pgy-export-core.js | 35 +++++- pugongying/test/userscript.test.js | 10 +- pugongying/xhs-pgy-export.user.js | 167 ++++++++++---------------- 6 files changed, 226 insertions(+), 109 deletions(-) create mode 100644 .gitignore create mode 100644 pugongying/package-lock.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2fcecf3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +**/node_modules/ +**/.DS_Store diff --git a/pugongying/package-lock.json b/pugongying/package-lock.json new file mode 100644 index 0000000..d187a5b --- /dev/null +++ b/pugongying/package-lock.json @@ -0,0 +1,118 @@ +{ + "name": "browser-script", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "browser-script", + "version": "1.0.0", + "dependencies": { + "xlsx": "^0.18.5" + } + }, + "node_modules/adler-32": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/adler-32/-/adler-32-1.3.1.tgz", + "integrity": "sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, + "node_modules/cfb": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/cfb/-/cfb-1.2.2.tgz", + "integrity": "sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==", + "license": "Apache-2.0", + "dependencies": { + "adler-32": "~1.3.0", + "crc-32": "~1.2.0" + }, + "engines": { + "node": ">=0.8" + } + }, + "node_modules/codepage": { + "version": "1.15.0", + "resolved": "https://registry.npmjs.org/codepage/-/codepage-1.15.0.tgz", + "integrity": "sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, + "node_modules/crc-32": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/crc-32/-/crc-32-1.2.2.tgz", + "integrity": "sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==", + "license": "Apache-2.0", + "bin": { + "crc32": "bin/crc32.njs" + }, + "engines": { + "node": ">=0.8" + } + }, + "node_modules/frac": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/frac/-/frac-1.1.2.tgz", + "integrity": "sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, + "node_modules/ssf": { + "version": "0.11.2", + "resolved": "https://registry.npmjs.org/ssf/-/ssf-0.11.2.tgz", + "integrity": "sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==", + "license": "Apache-2.0", + "dependencies": { + "frac": "~1.1.2" + }, + "engines": { + "node": ">=0.8" + } + }, + "node_modules/wmf": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wmf/-/wmf-1.0.2.tgz", + "integrity": "sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, + "node_modules/word": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/word/-/word-0.3.0.tgz", + "integrity": "sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.8" + } + }, + "node_modules/xlsx": { + "version": "0.18.5", + "resolved": "https://registry.npmjs.org/xlsx/-/xlsx-0.18.5.tgz", + "integrity": "sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==", + "license": "Apache-2.0", + "dependencies": { + "adler-32": "~1.3.0", + "cfb": "~1.2.1", + "codepage": "~1.15.0", + "crc-32": "~1.2.1", + "ssf": "~0.11.2", + "wmf": "~1.0.1", + "word": "~0.3.0" + }, + "bin": { + "xlsx": "bin/xlsx.njs" + }, + "engines": { + "node": ">=0.8" + } + } + } +} diff --git a/pugongying/package.json b/pugongying/package.json index d8ba289..cc67fd1 100644 --- a/pugongying/package.json +++ b/pugongying/package.json @@ -2,6 +2,9 @@ "name": "browser-script", "version": "1.0.0", "private": true, + "dependencies": { + "xlsx": "^0.18.5" + }, "scripts": { "test": "node --test", "test:coverage": "node --test --experimental-test-coverage" diff --git a/pugongying/src/xhs-pgy-export-core.js b/pugongying/src/xhs-pgy-export-core.js index 90caf50..e1241ef 100644 --- a/pugongying/src/xhs-pgy-export-core.js +++ b/pugongying/src/xhs-pgy-export-core.js @@ -376,6 +376,36 @@ function buildCsvContent(config) { return `\uFEFF${[headerLine, ...bodyLines].join("\r\n")}`; } +function buildXlsxContent(config) { + // Lazy require so the rest of the module stays usable without deps (e.g. pure parsing tests). + // In this repo we install it via package.json. + // eslint-disable-next-line global-require, import/no-extraneous-dependencies + const XLSX = require("xlsx"); + + const sheetName = sanitizeSheetName(config.sheetName || "达人数据"); + const columns = Array.isArray(config.columns) ? config.columns : []; + const headers = + Array.isArray(config.headers) && config.headers.length === columns.length + ? config.headers + : columns; + const rows = Array.isArray(config.rows) ? config.rows : []; + + const aoa = [headers.slice()]; + for (const row of rows) { + aoa.push( + columns.map((column) => { + const value = row[column] === undefined ? "" : row[column]; + return normalizeScalar(value); + }), + ); + } + + const ws = XLSX.utils.aoa_to_sheet(aoa); + const wb = XLSX.utils.book_new(); + XLSX.utils.book_append_sheet(wb, ws, sheetName); + return XLSX.write(wb, { bookType: "xlsx", type: "buffer" }); +} + function formatTimestamp(date) { const safeDate = date instanceof Date ? date : new Date(); const parts = [ @@ -523,7 +553,7 @@ function createExportController(options) { const rows = buildExportRows(cachedRecords, fields); const headers = fields.map((field) => getFieldLabel(field)); - const content = buildSpreadsheetXml({ + const content = buildXlsxContent({ columns: fields, headers, rows, @@ -531,7 +561,7 @@ function createExportController(options) { }); return { - filename: `xhs-bloggers-${formatTimestamp(now())}.xls`, + filename: `xhs-bloggers-${formatTimestamp(now())}.xlsx`, columns: fields, headers, rows, @@ -555,6 +585,7 @@ module.exports = { buildCsvContent, buildFieldOptions, buildSpreadsheetXml, + buildXlsxContent, createExportController, extractBloggerId, fetchMergedBloggerRecord, diff --git a/pugongying/test/userscript.test.js b/pugongying/test/userscript.test.js index fbf673d..9302583 100644 --- a/pugongying/test/userscript.test.js +++ b/pugongying/test/userscript.test.js @@ -192,14 +192,12 @@ test("createExportController previews and exports creator data", async () => { ); const exported = controller.exportSheet(["id", "name", "metrics.fans"]); - assert.equal(exported.filename, "xhs-bloggers-20260312-160910.xls"); + assert.equal(exported.filename, "xhs-bloggers-20260312-160910.xlsx"); assert.equal(exported.rows.length, 2); assert.deepEqual(exported.headers, ["ID", "达人昵称", "粉丝数"]); - assert.match(exported.content, /<\?mso-application progid="Excel\.Sheet"\?>/); - assert.match(exported.content, //); - assert.match(exported.content, /达人昵称/); - assert.match(exported.content, /达人-08d5/); - assert.match(exported.content, /达人-3456/); + assert.ok(Buffer.isBuffer(exported.content)); + assert.equal(exported.content[0], 0x50); // P + assert.equal(exported.content[1], 0x4b); // K }); test("createExportController merges supplemental endpoint payloads into namespaced fields", async () => { diff --git a/pugongying/xhs-pgy-export.user.js b/pugongying/xhs-pgy-export.user.js index 36cddbb..8cb5f17 100644 --- a/pugongying/xhs-pgy-export.user.js +++ b/pugongying/xhs-pgy-export.user.js @@ -1,10 +1,11 @@ // ==UserScript== // @name 小红书蒲公英达人信息导出 // @namespace https://pgy.xiaohongshu.com/ -// @version 0.1.0 +// @version 0.1.1 // @description 输入达人主页链接或达人 ID,勾选字段后导出 Excel // @match https://pgy.xiaohongshu.com/* // @grant none +// @require https://cdn.jsdelivr.net/npm/xlsx@0.18.5/dist/xlsx.full.min.js // ==/UserScript== (function bootstrap(root, factory) { @@ -301,74 +302,6 @@ }); } - function escapeCsvValue(value) { - const text = normalizeScalar(value); - if (/["\n,\r]/.test(text)) { - return `"${text.replace(/"/g, '""')}"`; - } - return text; - } - - function buildCsvContent(config) { - const columns = Array.isArray(config.columns) ? config.columns : []; - const headers = - Array.isArray(config.headers) && config.headers.length === columns.length - ? config.headers - : columns; - const rows = Array.isArray(config.rows) ? config.rows : []; - const headerLine = headers.map(escapeCsvValue).join(","); - const bodyLines = rows.map((row) => - columns - .map((column) => escapeCsvValue(row[column] === undefined ? "" : row[column])) - .join(","), - ); - - return `\uFEFF${[headerLine, ...bodyLines].join("\r\n")}`; - } - - function buildSpreadsheetXml(config) { - const sheetName = typeof config.sheetName === "string" ? config.sheetName : "达人数据"; - const columns = Array.isArray(config.columns) ? config.columns : []; - const headers = - Array.isArray(config.headers) && config.headers.length === columns.length - ? config.headers - : columns; - const rows = Array.isArray(config.rows) ? config.rows : []; - const headerCells = columns - .map( - (column, index) => - `${escapeXml(headers[index] ?? column)}`, - ) - .join(""); - - const dataRows = rows - .map((row) => { - const cells = columns - .map((column) => { - const value = row[column] === undefined ? "" : row[column]; - return `${escapeXml(value)}`; - }) - .join(""); - return `${cells}`; - }) - .join(""); - - return ` - - - - - ${headerCells} - ${dataRows} -
-
-
`; - } - function formatTimestamp(date) { const safeDate = date instanceof Date ? date : new Date(); const parts = [ @@ -523,20 +456,24 @@ ? selectedFields : cachedFields.map((field) => field.path); - const rows = buildExportRows(cachedRecords, fields); const headers = fields.map((field) => getFieldLabel(field)); - const content = buildSpreadsheetXml({ - columns: fields, - headers, - rows, - sheetName: "达人数据", - }); + if (!root.XLSX) { + throw new Error("未加载 SheetJS,无法导出 xlsx。"); + } + + const aoa = [headers.slice()]; + for (const record of cachedRecords) { + aoa.push(fields.map((field) => record.flattened[field] || "")); + } + const ws = root.XLSX.utils.aoa_to_sheet(aoa); + const wb = root.XLSX.utils.book_new(); + root.XLSX.utils.book_append_sheet(wb, ws, "达人数据"); + const content = root.XLSX.write(wb, { bookType: "xlsx", type: "array" }); return { - filename: `xhs-bloggers-${formatTimestamp(now())}.xls`, + filename: `xhs-bloggers-${formatTimestamp(now())}.xlsx`, columns: fields, headers, - rows, content, }; }, @@ -553,14 +490,6 @@ const headers = fields.map((field) => getFieldLabel(field)); const total = cachedRecords.length; - const headerCells = headers - .map((header) => `${escapeXml(header)}`) - .join(""); - const parts = [ - `\n\n\n \n \n ${headerCells}\n`, - ]; const report = (percentage, message) => { if (typeof onProgress !== "function") { @@ -569,21 +498,13 @@ onProgress(Math.max(0, Math.min(100, percentage)), message || ""); }; - report(0, "正在生成 Excel..."); + report(0, "正在生成 Excel(.xlsx)..."); + const aoa = [headers.slice()]; const yieldEvery = 50; for (let index = 0; index < total; index += 1) { const record = cachedRecords[index]; - const cells = fields - .map((field) => { - const value = - record && record.flattened && record.flattened[field] !== undefined - ? record.flattened[field] - : ""; - return `${escapeXml(value)}`; - }) - .join(""); - parts.push(` ${cells}\n`); + aoa.push(fields.map((field) => record.flattened[field] || "")); const isLast = index === total - 1; if (isLast || (index + 1) % yieldEvery === 0) { @@ -593,11 +514,15 @@ } } - parts.push("
\n
\n
"); - const content = parts.join(""); + report(100, "正在打包 xlsx..."); + const XLSX = await ensureXlsx(); + const ws = XLSX.utils.aoa_to_sheet(aoa); + const wb = XLSX.utils.book_new(); + XLSX.utils.book_append_sheet(wb, ws, "达人数据"); + const content = XLSX.write(wb, { bookType: "xlsx", type: "array" }); return { - filename: `xhs-bloggers-${formatTimestamp(now())}.xls`, + filename: `xhs-bloggers-${formatTimestamp(now())}.xlsx`, columns: fields, headers, content, @@ -643,9 +568,49 @@ } } + const XLSX_CDN_URLS = [ + "https://cdn.jsdelivr.net/npm/xlsx@0.18.5/dist/xlsx.full.min.js", + "https://cdnjs.cloudflare.com/ajax/libs/xlsx/0.18.5/xlsx.full.min.js", + "https://cdn.bootcdn.net/ajax/libs/xlsx/0.18.5/xlsx.full.min.js", + ]; + const loadedScripts = new Map(); + + function loadScript(url) { + if (loadedScripts.has(url)) { + return loadedScripts.get(url); + } + const promise = new Promise((resolve, reject) => { + const script = root.document.createElement("script"); + script.src = url; + script.async = true; + script.onload = () => resolve(); + script.onerror = () => reject(new Error(`加载脚本失败:${url}`)); + root.document.head.appendChild(script); + }); + loadedScripts.set(url, promise); + return promise; + } + + async function ensureXlsx() { + if (root.XLSX && root.XLSX.utils && typeof root.XLSX.write === "function") { + return root.XLSX; + } + for (const url of XLSX_CDN_URLS) { + try { + await loadScript(url); + if (root.XLSX && root.XLSX.utils && typeof root.XLSX.write === "function") { + return root.XLSX; + } + } catch (error) { + // try next url + } + } + throw new Error("加载 SheetJS 失败,可能被网络或页面 CSP 限制。"); + } + function downloadFile(filename, content) { const blob = new Blob([content], { - type: "application/vnd.ms-excel;charset=utf-8", + type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", }); const link = root.document.createElement("a"); const blobUrl = root.URL.createObjectURL(blob);