From 4800a375eea08369eebde5d9e86785761c625bf6 Mon Sep 17 00:00:00 2001 From: renzhiye Date: Thu, 2 Apr 2026 20:09:15 +0800 Subject: [PATCH] feat(api): add jd live session previews --- TODO.md | 101 ++++++ apps/api/src/platforms/jd/live-session.ts | 372 ++++++++++++++++++++++ apps/api/src/platforms/jd/parsers.test.ts | 151 +++++++++ apps/api/src/platforms/jd/parsers.ts | 361 +++++++++++++++++++++ apps/api/src/platforms/jd/types.ts | 93 ++++++ apps/api/src/platforms/jd/utils.ts | 115 +++++++ apps/api/src/server.jd-live.test.ts | 288 +++++++++++++++++ apps/api/src/server.ts | 109 ++++++- docs/CrawlerFeasibility.md | 7 +- 9 files changed, 1594 insertions(+), 3 deletions(-) create mode 100644 TODO.md create mode 100644 apps/api/src/platforms/jd/live-session.ts create mode 100644 apps/api/src/platforms/jd/parsers.test.ts create mode 100644 apps/api/src/platforms/jd/parsers.ts create mode 100644 apps/api/src/platforms/jd/types.ts create mode 100644 apps/api/src/platforms/jd/utils.ts create mode 100644 apps/api/src/server.jd-live.test.ts diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..6b1d2c4 --- /dev/null +++ b/TODO.md @@ -0,0 +1,101 @@ +# TODO + +- 更新时间:2026-04-02 +- 进度基线:2026-04-02 已完成一次 MVP 收敛;`npm run test`、`npm run typecheck` 通过;Web/API 实机流程已验证;JD 实时会话导入与 `search/detail/reviews` preview 已实机验证 +- 关联文档: + - `docs/tasks.md` + - `docs/DevelopmentPlan.md` + - `docs/tdd.md` + +## 维护约定 + +- 已完成任务统一使用 `- [x]` +- 未完成任务统一使用 `- [ ]` +- 进行中任务使用 `- [ ] ...(进行中)` +- 阻塞任务使用 `- [ ] ...(阻塞:原因)` +- 任务编号必须与 `docs/tasks.md` 对齐;若任务拆分、合并或改号,两个文件必须同步更新 + +## 当前主线 + +- [x] `S1-06` 会话中心 v1 与全局会话准备后端入口落地(MVP mock 版,支持 24h 会话、清理与回跳) +- [x] `S1-07` 新建任务页与全局会话准备入口落地 +- [x] `S2-01` 首个平台预检查与搜索适配器落地(MVP mock 版) +- [x] `S2-05` 标准化 v1 与最小报告快照落地(规则版) +- [ ] `S2-06` 单平台执行页闭环与回归包落地(进行中:闭环已可演示,回归包与真实异步执行待补) +- [ ] `S3-01` 第二平台 `precheck/search/detail/reviews` 适配器落地(进行中:当前双平台仍以 mock 适配为主) +- [ ] `S3-03` 阻塞恢复与 `L3 Browser Recovery` 落地(进行中:恢复页与重试链路已通,真实远程浏览器接管待补) +- [ ] `S4-02` AI 结构化报告生成与版本规则落地(进行中:版本规则已落地,真实 AI 生成待接入) +- [ ] `S4-05` 留存、删除 API 与联动清理链路落地(进行中:删除 API 与 30/90 天本地清理作业已落地,对象存储联动待补) +- [ ] `S4-06` 完整可观测性与审计日志落地(进行中:overview / audit 已有,完整指标体系待补) + +## 阶段快照 + +- [ ] `S0` 双平台能力矩阵、fixture/HAR、PoC 验证与 `strategy_attempts` 口径仍未冻结(进行中) +- [ ] `S1` 本地 JSON 持久化、API/BFF、会话准备、新建任务页与状态机骨架已可用,但数据库、队列、真实 `SSE` 仍未完成(进行中) +- [ ] `S2` 单平台最小闭环和最小报告已可演示,JD `search/detail/reviews` 实时 preview 已验证,但任务执行与标准化主链仍以 mock 数据为主(进行中) +- [ ] `S3` 双平台候选确认、执行控制台、恢复页与平台级重试已可用,但第二平台真实适配、`L2` 模板刷新与真实 `L3` 恢复未完成(进行中) +- [ ] `S4` 报告版本规则、报告页、历史任务页、版本切换、删除入口与观测概览已落地,但完整聚合、真实 AI、对象存储联动与完整审计仍未完成(进行中) +- [ ] `S5` 稳定性、性能、UAT、部署与发布准备尚未进入实施(未开始) + +## `S0` + +- [ ] `S0-01` 冻结双平台能力矩阵(未开始) +- [ ] `S0-02` 产出双平台首批 fixture 与 HAR 样本(未开始) +- [ ] `S0-03` 验证服务端受控浏览器与会话快照 PoC(进行中) +- [ ] `S0-04` 验证至少一个平台的非浏览器主路径 PoC(进行中:JD 已完成授权会话下 `search/detail/reviews` 实时 API 预览验证,待补模板刷新与量化口径) +- [x] `S0-05` 搭建 Monorepo 与基础工程骨架 +- [ ] `S0-06` 冻结 Phase 0 量化评分表、`strategy_attempts` 记录格式与进入开发门槛(未开始) + +## `S1` + +- [x] `S1-01` 共享领域模型与枚举包落地 +- [ ] `S1-02` 数据库、事件日志与对象存储模型落地(进行中:MVP 先落本地 JSON 持久化,正式数据库与对象存储待补) +- [ ] `S1-03` 任务编排、事件持久化与状态机骨架落地(进行中:状态机、事件日志、平台级重试已可用,队列化执行待补) +- [ ] `S1-04` API / BFF、平台就绪摘要与 `SSE` 基础接口落地(进行中:REST/BFF 已可用,`SSE` 仍是最小 snapshot 形态) +- [x] `S1-05` Web 工作台基础壳层与核心路由落地 +- [x] `S1-06` 会话中心 v1 与全局会话准备后端入口落地(MVP mock 版) +- [x] `S1-07` 新建任务页与全局会话准备入口落地 +- [ ] `S1-08` TDD 与 CI 基础链路落地(进行中) + +## `S2` + +- [x] `S2-01` 首个平台预检查与搜索适配器落地(MVP mock 版) +- [x] `S2-02` 候选确认页与确认 API 落地 +- [ ] `S2-03` 单平台商品详情抓取链路落地(进行中:JD live detail preview 已接入真实 `pc_detailpage_wareBusiness`,会话导入与解析已验证,待纳入任务执行与标准化主链) +- [ ] `S2-04` 单平台评论采集与抽样链路落地(进行中:JD live reviews preview 已接入真实 `getLegoWareDetailComment`,分页参数改写与解析已验证,待纳入任务执行与抽样主链) +- [x] `S2-05` 标准化 v1 与最小报告快照落地(规则版) +- [ ] `S2-06` 单平台执行页闭环与回归包落地(进行中:新建 -> 确认 -> 执行 -> 报告已打通) + +## `S3` + +- [ ] `S3-01` 第二平台 `precheck/search/detail/reviews` 适配器落地(进行中) +- [ ] `S3-02` 模板刷新与 `L2` 路径落地(未开始) +- [ ] `S3-03` 阻塞恢复与 `L3 Browser Recovery` 落地(进行中) +- [ ] `S3-04` 双平台候选确认与执行控制台落地(进行中:页面与状态展示已具备,真实并发执行待补) +- [x] `S3-05` `PartialCompleted`、`Blocked`、`Failed` 汇总规则落地 +- [ ] `S3-06` 双平台主回归包落地(未开始) + +## `S4` + +- [ ] `S4-01` 完整标准化与三级聚合落地(进行中) +- [ ] `S4-02` AI 结构化报告生成与版本规则落地(进行中) +- [ ] `S4-03` 报告页、证据抽屉与质量标记落地(进行中:报告页、质量标记与证据索引已落地,证据抽屉待补) +- [x] `S4-04` 历史任务页、版本切换与删除入口落地 +- [ ] `S4-05` 留存、删除 API 与联动清理链路落地(进行中:删除 API 与 30/90 天本地清理作业已落地,对象存储联动待补) +- [ ] `S4-06` 完整可观测性与审计日志落地(进行中) + +## `S5` + +- [ ] `S5-01` 平台级定向重试稳定化(进行中) +- [ ] `S5-02` 性能与成本优化(未开始) +- [ ] `S5-03` UAT 与试运行任务集执行(未开始) +- [ ] `S5-04` 部署、值守、排障与热修手册落地(未开始) +- [ ] `S5-05` 最终验收与文档同步收口(未开始) + +## 横向任务 + +- [ ] `X-01` 上下游文档变更同步(进行中) +- [ ] `X-02` 安全与合规检查(未开始) +- [ ] `X-03` 测试资产维护(进行中) +- [ ] `X-04` 设计一致性与可访问性检查(进行中) +- [ ] `X-05` 观测指标复盘(未开始) diff --git a/apps/api/src/platforms/jd/live-session.ts b/apps/api/src/platforms/jd/live-session.ts new file mode 100644 index 0000000..afcf321 --- /dev/null +++ b/apps/api/src/platforms/jd/live-session.ts @@ -0,0 +1,372 @@ +import { + parseJdDetailApiResponse, + parseJdReviewsApiResponse, + parseJdSearchApiResponse, + parseJdSearchHtml +} from "./parsers"; +import type { + JdDetailPreviewResult, + JdLiveService, + JdLiveSessionInput, + JdLiveSessionSummary, + JdReviewsPreviewResult, + JdSearchMode, + JdSearchPreviewResult, + JdTemplateSummary +} from "./types"; +import { firstString, readQueryBody, withUpdatedQueryBody } from "./utils"; + +const DEFAULT_JD_USER_AGENT = + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + + "(KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"; + +type StoredJdLiveSession = { + cookieHeader: string; + importedAt: string; + userAgent: string; + searchApiTemplateUrl?: string | undefined; + detailTemplateUrl?: string | undefined; + reviewsTemplateUrl?: string | undefined; + searchReferer?: string | undefined; + detailReferer?: string | undefined; +}; + +class JdLiveError extends Error { + constructor( + message: string, + readonly statusCode: number = 400 + ) { + super(message); + this.name = "JdLiveError"; + } +} + +function nowIso(): string { + return new Date().toISOString(); +} + +function readEnvSession(): StoredJdLiveSession | null { + const cookieHeader = process.env.JD_COOKIE_HEADER?.trim(); + if (!cookieHeader) { + return null; + } + + const searchApiTemplateUrl = process.env.JD_SEARCH_API_TEMPLATE_URL?.trim(); + const detailTemplateUrl = process.env.JD_DETAIL_TEMPLATE_URL?.trim(); + const reviewsTemplateUrl = process.env.JD_REVIEWS_TEMPLATE_URL?.trim(); + const searchReferer = process.env.JD_SEARCH_REFERER?.trim(); + const detailReferer = process.env.JD_DETAIL_REFERER?.trim(); + + return { + cookieHeader, + importedAt: nowIso(), + userAgent: process.env.JD_USER_AGENT?.trim() || DEFAULT_JD_USER_AGENT, + ...(searchApiTemplateUrl ? { searchApiTemplateUrl } : {}), + ...(detailTemplateUrl ? { detailTemplateUrl } : {}), + ...(reviewsTemplateUrl ? { reviewsTemplateUrl } : {}), + ...(searchReferer ? { searchReferer } : {}), + ...(detailReferer ? { detailReferer } : {}) + }; +} + +function requireNonEmptyCookie(cookieHeader: string): string { + const normalized = cookieHeader.trim(); + if (!normalized) { + throw new JdLiveError("cookieHeader is required for JD live requests."); + } + + return normalized; +} + +function extractTemplateSkuId(templateUrl: string | undefined): string | undefined { + if (!templateUrl) { + return undefined; + } + + const url = new URL(templateUrl); + const body = readQueryBody(url); + return firstString(body?.skuId, body?.sku) ?? undefined; +} + +function buildTemplateSummary(templateUrl: string | undefined): JdTemplateSummary { + const skuId = extractTemplateSkuId(templateUrl); + + return { + available: Boolean(templateUrl), + ...(skuId ? { skuId } : {}) + }; +} + +function templateMatchesQuery( + templateUrl: string | undefined, + query: string +): boolean { + if (!templateUrl) { + return false; + } + + const templateKeyword = new URL(templateUrl).searchParams.get("keyword"); + return Boolean(templateKeyword && templateKeyword === query); +} + +async function fetchTextOrThrow( + url: string, + init: RequestInit, + sessionExpiredMessage: string +): Promise<{ finalUrl: string; text: string }> { + let response: Response; + + try { + response = await fetch(url, { + ...init, + redirect: "follow" + }); + } catch (error) { + throw new JdLiveError( + `JD live request failed before receiving a response: ${ + error instanceof Error ? error.message : "unknown error" + }`, + 502 + ); + } + + const text = await response.text(); + if (response.url.includes("passport.jd.com") || text.includes("passport.jd.com")) { + throw new JdLiveError(sessionExpiredMessage, 409); + } + + if (!response.ok) { + throw new JdLiveError( + `JD live request failed with status ${response.status}.`, + 502 + ); + } + + return { + finalUrl: response.url, + text + }; +} + +export function isJdLiveError(error: unknown): error is Error & { statusCode: number } { + return error instanceof Error && "statusCode" in error; +} + +export class JdLiveSessionService implements JdLiveService { + private session: StoredJdLiveSession | null = readEnvSession(); + + getSessionSummary(): JdLiveSessionSummary { + return { + configured: Boolean(this.session), + hasCookie: Boolean(this.session?.cookieHeader), + ...(this.session?.importedAt ? { importedAt: this.session.importedAt } : {}), + ...(this.session?.userAgent ? { userAgent: this.session.userAgent } : {}), + searchApiTemplate: buildTemplateSummary(this.session?.searchApiTemplateUrl), + detailTemplate: buildTemplateSummary(this.session?.detailTemplateUrl), + reviewsTemplate: buildTemplateSummary(this.session?.reviewsTemplateUrl) + }; + } + + importSession(input: JdLiveSessionInput): JdLiveSessionSummary { + const searchApiTemplateUrl = input.searchApiTemplateUrl?.trim(); + const detailTemplateUrl = input.detailTemplateUrl?.trim(); + const reviewsTemplateUrl = input.reviewsTemplateUrl?.trim(); + const searchReferer = input.searchReferer?.trim(); + const detailReferer = input.detailReferer?.trim(); + + this.session = { + cookieHeader: requireNonEmptyCookie(input.cookieHeader), + importedAt: nowIso(), + userAgent: input.userAgent?.trim() || DEFAULT_JD_USER_AGENT, + ...(searchApiTemplateUrl ? { searchApiTemplateUrl } : {}), + ...(detailTemplateUrl ? { detailTemplateUrl } : {}), + ...(reviewsTemplateUrl ? { reviewsTemplateUrl } : {}), + ...(searchReferer ? { searchReferer } : {}), + ...(detailReferer ? { detailReferer } : {}) + }; + + return this.getSessionSummary(); + } + + clearSession(): void { + this.session = readEnvSession(); + } + + async previewSearch( + query: string, + mode?: JdSearchMode + ): Promise { + const session = this.requireSession(); + const normalizedQuery = query.trim(); + if (!normalizedQuery) { + throw new JdLiveError("query is required for JD live search preview."); + } + + const resolvedMode = + mode ?? + (templateMatchesQuery(session.searchApiTemplateUrl, normalizedQuery) ? "api" : "html"); + + if (resolvedMode === "api") { + if (!session.searchApiTemplateUrl) { + throw new JdLiveError( + "JD search API template is missing. Import a fresh search request URL or use mode=html." + ); + } + + const templateUrl = new URL(session.searchApiTemplateUrl); + const templateKeyword = templateUrl.searchParams.get("keyword"); + if (templateKeyword && templateKeyword !== normalizedQuery) { + throw new JdLiveError( + `Imported search API template is locked to query "${templateKeyword}". ` + + "Capture a fresh request for the target query or use mode=html." + ); + } + + const response = await fetchTextOrThrow( + session.searchApiTemplateUrl, + { + headers: { + Accept: "application/json, text/plain, */*", + Cookie: session.cookieHeader, + Referer: + session.searchReferer ?? + `https://search.jd.com/Search?keyword=${encodeURIComponent(normalizedQuery)}`, + "User-Agent": session.userAgent + } + }, + "JD search session appears invalid. Re-login in the browser and re-import the cookie/header." + ); + + const candidates = parseJdSearchApiResponse(normalizedQuery, { text: response.text }); + return { + query: normalizedQuery, + source: "api", + candidateCount: candidates.length, + candidates + }; + } + + const searchUrl = `https://search.jd.com/Search?keyword=${encodeURIComponent(normalizedQuery)}`; + const response = await fetchTextOrThrow( + searchUrl, + { + headers: { + Accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language": "zh-CN,zh;q=0.9", + Cookie: session.cookieHeader, + Referer: session.searchReferer ?? "https://www.jd.com/", + "User-Agent": session.userAgent + } + }, + "JD search session appears invalid. Re-login in the browser and re-import the cookie/header." + ); + + const candidates = parseJdSearchHtml(normalizedQuery, response.text); + return { + query: normalizedQuery, + source: "html", + candidateCount: candidates.length, + candidates + }; + } + + async previewDetail(skuId: string): Promise { + const session = this.requireSession(); + const normalizedSkuId = skuId.trim(); + if (!normalizedSkuId) { + throw new JdLiveError("skuId is required for JD detail preview."); + } + + if (!session.detailTemplateUrl) { + throw new JdLiveError( + "JD detail template is missing. Capture a fresh pc_detailpage_wareBusiness request and import it first." + ); + } + + const templateSkuId = extractTemplateSkuId(session.detailTemplateUrl); + if (templateSkuId && templateSkuId !== normalizedSkuId) { + throw new JdLiveError( + `Imported detail template is bound to sku ${templateSkuId}. Open the matching JD item page and capture a fresh request for sku ${normalizedSkuId}.` + ); + } + + const response = await fetchTextOrThrow( + session.detailTemplateUrl, + { + headers: { + Accept: "application/json, text/plain, */*", + Cookie: session.cookieHeader, + Referer: session.detailReferer ?? `https://item.jd.com/${normalizedSkuId}.html`, + "User-Agent": session.userAgent + } + }, + "JD detail session appears invalid. Re-login in the browser and re-import the cookie/header." + ); + + return { + skuId: normalizedSkuId, + source: "api", + detail: parseJdDetailApiResponse(normalizedSkuId, { text: response.text }) + }; + } + + async previewReviews( + skuId: string, + commentCount = 5 + ): Promise { + const session = this.requireSession(); + const normalizedSkuId = skuId.trim(); + if (!normalizedSkuId) { + throw new JdLiveError("skuId is required for JD reviews preview."); + } + + if (!session.reviewsTemplateUrl) { + throw new JdLiveError( + "JD reviews template is missing. Capture a fresh getLegoWareDetailComment request and import it first." + ); + } + + const templateSkuId = extractTemplateSkuId(session.reviewsTemplateUrl); + if (templateSkuId && templateSkuId !== normalizedSkuId) { + throw new JdLiveError( + `Imported reviews template is bound to sku ${templateSkuId}. Open the matching JD item page and capture a fresh request for sku ${normalizedSkuId}.` + ); + } + + const templateUrl = new URL(session.reviewsTemplateUrl); + const requestUrl = withUpdatedQueryBody(templateUrl, (body) => ({ + ...body, + commentNum: commentCount + })); + + const response = await fetchTextOrThrow( + requestUrl, + { + headers: { + Accept: "application/json, text/plain, */*", + Cookie: session.cookieHeader, + Referer: session.detailReferer ?? `https://item.jd.com/${normalizedSkuId}.html`, + "User-Agent": session.userAgent + } + }, + "JD reviews session appears invalid. Re-login in the browser and re-import the cookie/header." + ); + + return { + skuId: normalizedSkuId, + source: "api", + reviews: parseJdReviewsApiResponse(normalizedSkuId, { text: response.text }) + }; + } + + private requireSession(): StoredJdLiveSession { + if (!this.session?.cookieHeader) { + throw new JdLiveError( + "JD live session is not configured. Import a browser cookie/header first." + ); + } + + return this.session; + } +} diff --git a/apps/api/src/platforms/jd/parsers.test.ts b/apps/api/src/platforms/jd/parsers.test.ts new file mode 100644 index 0000000..25d8543 --- /dev/null +++ b/apps/api/src/platforms/jd/parsers.test.ts @@ -0,0 +1,151 @@ +import { readFileSync } from "node:fs"; +import { describe, expect, it } from "vitest"; + +import { + parseJdDetailApiResponse, + parseJdReviewsApiResponse, + parseJdSearchApiResponse, + parseJdSearchHtml +} from "./parsers"; + +function readFixture(path: string): unknown { + return JSON.parse(readFileSync(new URL(path, import.meta.url), "utf8")) as unknown; +} + +describe("JD parsers", () => { + it("parses real JD search API fixtures into candidate records", () => { + const fixture = readFixture("../../../../../jd-search-json-shape.json") as { + firstWare: Record; + }; + + const candidates = parseJdSearchApiResponse("iPhone 15", { + data: { + wareList: [fixture.firstWare] + } + }); + + expect(candidates).toHaveLength(1); + expect(candidates[0]).toMatchObject({ + platform: "jd", + title: expect.stringContaining("iPhone"), + priceLabel: expect.stringMatching(/^¥/), + storeName: expect.any(String) + }); + expect(candidates[0]?.title).not.toContain(" { + const fixture = readFixture("../../../../../jd-search-card-blocks.json") as Array<{ + sku: string; + html: string; + }>; + const html = fixture.map((item) => item.html).join(""); + + const candidates = parseJdSearchHtml("iPhone 15", html); + + expect(candidates).toHaveLength(3); + expect(candidates).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + candidateId: "jd-100068388533", + storeName: "Apple产品京东自营旗舰店", + productUrl: "https://item.jd.com/100068388533.html", + salesHint: expect.stringContaining("已售500万+") + }) + ]) + ); + }); + + it("parses JD detail payloads from raw API objects", () => { + const detail = parseJdDetailApiResponse("100068388533", { + wareInfo: { + wareInfoMap: { + sku_status: 1 + } + }, + skuHeadVO: { + skuTitle: "Apple/苹果 iPhone 15 (A3092) 128GB 绿色 支持移动联通电信5G 双卡双待手机" + }, + price: { + p: "4398.00", + op: "4599.00" + }, + itemShopInfo: { + shopName: "Apple产品京东自营旗舰店" + }, + crumbInfoVO: { + crumbs: [ + { text: "手机通讯" }, + { text: "手机" }, + { text: "Apple" } + ] + }, + stockInfo: { + stockStateDesc: "有货,仅剩318件" + }, + mainImageVO: { + mainImageArea: { + imageUrl: "jfs/t1/example.jpg" + } + } + }); + + expect(detail).toMatchObject({ + skuId: "100068388533", + title: "Apple/苹果 iPhone 15 (A3092) 128GB 绿色 支持移动联通电信5G 双卡双待手机", + price: "4398.00", + originalPrice: "4599.00", + shopName: "Apple产品京东自营旗舰店", + categoryPath: ["手机通讯", "手机", "Apple"], + stockState: "有货,仅剩318件", + mainImage: "https://img14.360buyimg.com/n2/jfs/t1/example.jpg" + }); + }); + + it("parses JD reviews payloads from raw API objects", () => { + const reviews = parseJdReviewsApiResponse("100068388533", { + allCnt: "10000", + goodRate: "95%", + pictureCnt: "500", + tagStatisticsinfoList: [ + { + tagId: "tag-1", + name: "拍照效果超清晰", + count: "9313" + }, + { + tagId: "tag-2", + name: "手感很舒服", + count: "8628" + } + ], + commentInfoList: [ + { + commentId: "103893190162198263", + commentData: "蓝色 iPhone 15 颜值很高。", + commentScore: 5, + commentDate: "2026-04-02 19:23:16", + userLevelName: "PLUS会员" + } + ] + }); + + expect(reviews).toMatchObject({ + skuId: "100068388533", + total: "10000", + goodRate: "95%", + pictureCount: "500" + }); + expect(reviews.tags[0]).toMatchObject({ + tagId: "tag-1", + name: "拍照效果超清晰", + count: "9313" + }); + expect(reviews.comments[0]).toMatchObject({ + id: "103893190162198263", + content: "蓝色 iPhone 15 颜值很高。", + score: "5", + userLevelName: "PLUS会员" + }); + }); +}); diff --git a/apps/api/src/platforms/jd/parsers.ts b/apps/api/src/platforms/jd/parsers.ts new file mode 100644 index 0000000..32d4604 --- /dev/null +++ b/apps/api/src/platforms/jd/parsers.ts @@ -0,0 +1,361 @@ +import type { CandidateRecord } from "@cross-ai/domain"; + +import type { + JdProductDetailSnapshot, + JdProductReviewsSnapshot, + JdReviewCommentSnapshot, + JdReviewTagSnapshot +} from "./types"; +import { + absolutizeUrl, + asArray, + asRecord, + firstString, + normalizeWhitespace, + stringFrom, + uniqueStrings +} from "./utils"; + +function unwrapCapturedPayload(input: unknown): unknown { + const record = asRecord(input); + const text = stringFrom(record?.text); + if (!text) { + return input; + } + + try { + return JSON.parse(text) as unknown; + } catch { + return input; + } +} + +function extractSpecLabel(title: string): string { + const storageMatch = title.match(/\b\d+(?:GB|TB)\b/i); + if (storageMatch) { + return storageMatch[0].toUpperCase(); + } + + const colorMatch = title.match(/(黑色|白色|蓝色|粉色|绿色|黄色|紫色|原色|钛金属)/); + if (colorMatch) { + return colorMatch[0]; + } + + return "标准版"; +} + +function normalizePriceText(value: string | null): { value: number; label: string } | null { + if (!value) { + return null; + } + + const stripped = value.replace(/[^\d.]/g, ""); + if (!stripped) { + return null; + } + + const parsed = Number.parseFloat(stripped); + if (Number.isNaN(parsed)) { + return null; + } + + return { + value: parsed, + label: `¥${parsed.toString()}` + }; +} + +function normalizeInlineText(value: string | null): string | null { + if (!value) { + return null; + } + + const normalized = normalizeWhitespace(value).replace(/\s+([,。;:、])/g, "$1"); + return normalized || null; +} + +function matchFirst(value: string, pattern: RegExp): string | null { + const match = pattern.exec(value); + return match?.[1] ? normalizeWhitespace(match[1]) : null; +} + +function extractSearchCardBlocks(html: string): string[] { + const matches = html.matchAll( + /]*plugin_goodsCardWrapper[^>]*data-sku="[^"]+"[\s\S]*?(?=]*plugin_goodsCardWrapper[^>]*data-sku="|$)/g + ); + + return Array.from(matches, (match) => match[0]); +} + +function parseSearchCardBlock(block: string): CandidateRecord | null { + const sku = matchFirst(block, /data-sku="([^"]+)"/); + const title = + matchFirst( + block, + /_goods_title_container_[^"]*"[\s\S]*?]*title="([^"]+)"/ + ) ?? matchFirst(block, /title="([^"]+)"/); + const priceText = matchFirst(block, /]*>([\s\S]*?)<\/span>/); + const price = normalizePriceText(priceText); + const storeName = matchFirst( + block, + /([\s\S]*?)<\/span>/ + ); + const imageUrl = absolutizeUrl( + matchFirst(block, /]+src="([^"]+)"[^>]*alt="">/) ?? + matchFirst(block, /]+data-src="([^"]+)"/) + ); + const soldHint = matchFirst(block, /title="(已售[^"]+)"/); + const trendHint = matchFirst(block, /title="(30天[^"]+)"/); + const featureMatches = Array.from( + block.matchAll(/([^<]+)<\/span>/g), + (match) => normalizeWhitespace(match[1] ?? "") + ).filter(Boolean); + const highlights = uniqueStrings(featureMatches).slice(0, 4); + + if (!sku || !title || !price || !storeName) { + return null; + } + + return { + candidateId: `jd-${sku}`, + platform: "jd", + title, + price: price.value, + priceLabel: price.label, + storeName, + productUrl: `https://item.jd.com/${sku}.html`, + imageUrl: imageUrl ?? "https://placehold.co/640x480?text=JD", + salesHint: uniqueStrings([soldHint, trendHint]).join(" · ") || "暂无销量信息", + specLabel: extractSpecLabel(title), + highlights: highlights.length > 0 ? highlights : ["京东商品卡片已命中"] + }; +} + +export function parseJdSearchHtml(query: string, html: string): CandidateRecord[] { + const blocks = extractSearchCardBlocks(html); + const seen = new Set(); + const candidates: CandidateRecord[] = []; + + for (const block of blocks) { + const candidate = parseSearchCardBlock(block); + if (!candidate || seen.has(candidate.candidateId)) { + continue; + } + + seen.add(candidate.candidateId); + candidates.push(candidate); + } + + if (candidates.length > 0) { + return candidates; + } + + if (html.includes("暂无") || html.includes("很抱歉没有找到")) { + return []; + } + + return [ + { + candidateId: `jd-fallback-${encodeURIComponent(query)}`, + platform: "jd", + title: query, + price: 0, + priceLabel: "¥0", + storeName: "京东", + productUrl: `https://search.jd.com/Search?keyword=${encodeURIComponent(query)}`, + imageUrl: "https://placehold.co/640x480?text=JD", + salesHint: "页面已返回,但未解析出稳定商品卡片", + specLabel: "待确认", + highlights: ["需要刷新搜索模板或调整解析器"] + } + ]; +} + +export function parseJdSearchApiResponse(query: string, input: unknown): CandidateRecord[] { + const payload = asRecord(unwrapCapturedPayload(input)); + const data = asRecord(payload?.data); + const wareList = asArray(data?.wareList); + const seen = new Set(); + const candidates: CandidateRecord[] = []; + + for (const item of wareList) { + const ware = asRecord(item); + const sku = stringFrom(ware?.skuId); + const title = normalizeInlineText(firstString(ware?.wareName, ware?.wname)); + const priceText = firstString( + ware?.jdPrice, + asRecord(ware?.finalPrice)?.estimatedPrice, + ware?.price + ); + const price = normalizePriceText(priceText); + + if (!sku || !title || !price || seen.has(sku)) { + continue; + } + + const storeName = + normalizeInlineText(firstString(ware?.shopName, ware?.storeName, "京东店铺")) ?? + "京东店铺"; + const totalSales = stringFrom(ware?.totalSales); + const commentFuzzy = firstString(ware?.commentFuzzy, ware?.comment); + const highlights = uniqueStrings([ + stringFrom(ware?.selfSupport) === "1" ? "京东自营" : null, + stringFrom(ware?.good), + stringFrom(ware?.averageScore) ? `评分 ${stringFrom(ware?.averageScore)}` : null + ]); + + seen.add(sku); + candidates.push({ + candidateId: `jd-${sku}`, + platform: "jd", + title, + price: price.value, + priceLabel: price.label, + storeName, + productUrl: `https://item.jd.com/${sku}.html`, + imageUrl: + absolutizeUrl(stringFrom(ware?.imageurl)) ?? + "https://placehold.co/640x480?text=JD", + salesHint: + uniqueStrings([ + totalSales ? `已售${totalSales}` : null, + commentFuzzy ? `累计评价 ${commentFuzzy}` : null + ]).join(" · ") || "暂无销量信息", + specLabel: extractSpecLabel(title), + highlights: highlights.length > 0 ? highlights : ["京东 API 返回候选"] + }); + } + + return candidates; +} + +export function parseJdDetailApiResponse( + skuId: string, + input: unknown +): JdProductDetailSnapshot { + const payload = asRecord(unwrapCapturedPayload(input)); + const wareInfo = asRecord(payload?.wareInfo); + const wareInfoMap = asRecord(wareInfo?.wareInfoMap); + const price = asRecord(payload?.price); + const finalPrice = asRecord(price?.finalPrice); + const itemShopInfo = asRecord(payload?.itemShopInfo); + const crumbInfo = asRecord(payload?.crumbInfoVO); + const stockInfo = asRecord(payload?.stockInfo) ?? asRecord(payload?.stockVO); + const mainImage = asRecord(payload?.mainImageVO); + const mainImageArea = asRecord(mainImage?.mainImageArea); + const skuHead = asRecord(payload?.skuHeadVO); + const categoryPath = asArray(crumbInfo?.crumbs) + .map((item) => { + const crumb = asRecord(item); + return firstString(crumb?.text, crumb?.name); + }) + .filter((item): item is string => Boolean(item)); + + return { + skuId: firstString(wareInfo?.skuId, wareInfoMap?.skuId, skuId) ?? skuId, + title: normalizeInlineText( + firstString( + skuHead?.skuTitle, + wareInfo?.wname, + wareInfo?.name, + wareInfoMap?.wname, + wareInfoMap?.name, + skuHead?.seoTitle + ) + ), + price: firstString(price?.p, price?.price), + originalPrice: firstString(price?.op, price?.originalPrice), + estimatedPrice: firstString(finalPrice?.estimatedPrice, finalPrice?.price), + shopName: normalizeInlineText( + firstString(itemShopInfo?.shopName, itemShopInfo?.venderName) + ), + vendorId: firstString(itemShopInfo?.venderId, itemShopInfo?.shopId), + categoryPath, + stockState: normalizeInlineText( + firstString( + stockInfo?.stockStateDesc, + stockInfo?.stockDesc, + stockInfo?.stockStateName, + stockInfo?.stockState + ) + ), + mainImage: absolutizeUrl( + firstString( + mainImageArea?.imageUrl, + mainImage?.mainImageUrl, + mainImage?.mainImage, + asRecord(asArray(mainImage?.carouselArea)[0])?.imageUrl, + wareInfo?.imageUrl, + wareInfoMap?.imageUrl + ) + ), + averageScore: firstString( + wareInfo?.averageScore, + wareInfo?.score, + wareInfoMap?.averageScore, + wareInfoMap?.score + ) + }; +} + +function parseReviewTag(input: unknown): JdReviewTagSnapshot | null { + const tag = asRecord(input); + const name = firstString(tag?.name, tag?.tagName); + if (!name) { + return null; + } + + return { + tagId: firstString(tag?.tagId, tag?.id), + name, + count: firstString(tag?.count, tag?.num) + }; +} + +function parseReviewComment(input: unknown): JdReviewCommentSnapshot | null { + const comment = asRecord(input); + const content = normalizeInlineText( + firstString(comment?.content, comment?.commentData, comment?.tagCommentContent) + ); + const id = firstString(comment?.id, comment?.commentId); + + if (!content || !id) { + return null; + } + + return { + id, + content, + score: firstString(comment?.score, comment?.commentScore), + creationTime: firstString( + comment?.creationTime, + comment?.creationDate, + comment?.commentDate + ), + userLevelName: normalizeInlineText( + firstString(comment?.userLevelName, comment?.userClientShow) + ) + }; +} + +export function parseJdReviewsApiResponse( + skuId: string, + input: unknown +): JdProductReviewsSnapshot { + const payload = asRecord(unwrapCapturedPayload(input)); + const tags = asArray(payload?.tagStatisticsinfoList) + .map((tag) => parseReviewTag(tag)) + .filter((tag): tag is JdReviewTagSnapshot => Boolean(tag)); + const comments = asArray(payload?.commentInfoList) + .map((comment) => parseReviewComment(comment)) + .filter((comment): comment is JdReviewCommentSnapshot => Boolean(comment)); + + return { + skuId, + total: firstString(payload?.allCnt, payload?.allCntStr, payload?.goodCnt), + goodRate: firstString(payload?.goodRate, payload?.goodRateShow), + pictureCount: firstString(payload?.pictureCnt, payload?.showPicCnt), + tags, + comments + }; +} diff --git a/apps/api/src/platforms/jd/types.ts b/apps/api/src/platforms/jd/types.ts new file mode 100644 index 0000000..aa77fa4 --- /dev/null +++ b/apps/api/src/platforms/jd/types.ts @@ -0,0 +1,93 @@ +import type { CandidateRecord } from "@cross-ai/domain"; + +export type JdSearchMode = "html" | "api"; + +export interface JdTemplateSummary { + available: boolean; + skuId?: string | undefined; +} + +export interface JdLiveSessionInput { + cookieHeader: string; + userAgent?: string | undefined; + searchApiTemplateUrl?: string | undefined; + detailTemplateUrl?: string | undefined; + reviewsTemplateUrl?: string | undefined; + searchReferer?: string | undefined; + detailReferer?: string | undefined; +} + +export interface JdLiveSessionSummary { + configured: boolean; + importedAt?: string | undefined; + hasCookie: boolean; + userAgent?: string | undefined; + searchApiTemplate: JdTemplateSummary; + detailTemplate: JdTemplateSummary; + reviewsTemplate: JdTemplateSummary; +} + +export interface JdSearchPreviewResult { + query: string; + source: JdSearchMode; + candidateCount: number; + candidates: CandidateRecord[]; +} + +export interface JdProductDetailSnapshot { + skuId: string; + title: string | null; + price: string | null; + originalPrice: string | null; + estimatedPrice: string | null; + shopName: string | null; + vendorId: string | null; + categoryPath: string[]; + stockState: string | null; + mainImage: string | null; + averageScore: string | null; +} + +export interface JdReviewTagSnapshot { + tagId: string | null; + name: string; + count: string | null; +} + +export interface JdReviewCommentSnapshot { + id: string; + content: string; + score: string | null; + creationTime: string | null; + userLevelName: string | null; +} + +export interface JdProductReviewsSnapshot { + skuId: string; + total: string | null; + goodRate: string | null; + pictureCount: string | null; + tags: JdReviewTagSnapshot[]; + comments: JdReviewCommentSnapshot[]; +} + +export interface JdDetailPreviewResult { + skuId: string; + source: "api"; + detail: JdProductDetailSnapshot; +} + +export interface JdReviewsPreviewResult { + skuId: string; + source: "api"; + reviews: JdProductReviewsSnapshot; +} + +export interface JdLiveService { + getSessionSummary(): JdLiveSessionSummary; + importSession(input: JdLiveSessionInput): JdLiveSessionSummary; + clearSession(): void; + previewSearch(query: string, mode?: JdSearchMode): Promise; + previewDetail(skuId: string): Promise; + previewReviews(skuId: string, commentCount?: number): Promise; +} diff --git a/apps/api/src/platforms/jd/utils.ts b/apps/api/src/platforms/jd/utils.ts new file mode 100644 index 0000000..dbaa242 --- /dev/null +++ b/apps/api/src/platforms/jd/utils.ts @@ -0,0 +1,115 @@ +export function asRecord(value: unknown): Record | null { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return null; + } + + return value as Record; +} + +export function asArray(value: unknown): unknown[] { + return Array.isArray(value) ? value : []; +} + +export function stringFrom(value: unknown): string | null { + if (typeof value === "string") { + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : null; + } + + if (typeof value === "number" || typeof value === "boolean") { + return String(value); + } + + return null; +} + +export function firstString(...values: unknown[]): string | null { + for (const value of values) { + const resolved = stringFrom(value); + if (resolved) { + return resolved; + } + } + + return null; +} + +export function absolutizeUrl(value: string | null | undefined): string | null { + if (!value) { + return null; + } + + if (value.startsWith("http://") || value.startsWith("https://")) { + return value; + } + + if (value.startsWith("//")) { + return `https:${value}`; + } + + if (value.startsWith("/")) { + return `https://www.jd.com${value}`; + } + + if (value.startsWith("jfs/") || value.startsWith("t1/") || value.startsWith("t202")) { + return `https://img14.360buyimg.com/n2/${value}`; + } + + return `https://${value}`; +} + +export function decodeHtmlEntities(value: string): string { + return value + .replace(/ /g, " ") + .replace(/&/g, "&") + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/</g, "<") + .replace(/>/g, ">"); +} + +export function stripTags(value: string): string { + return value.replace(/<[^>]+>/g, " "); +} + +export function normalizeWhitespace(value: string): string { + return decodeHtmlEntities(stripTags(value)).replace(/\s+/g, " ").trim(); +} + +export function uniqueStrings(values: Array): string[] { + return Array.from( + new Set( + values + .map((value) => value?.trim()) + .filter((value): value is string => Boolean(value)) + ) + ); +} + +export function parseEmbeddedJson(value: string | null): Record | null { + if (!value) { + return null; + } + + try { + const parsed = JSON.parse(value) as unknown; + return asRecord(parsed); + } catch { + return null; + } +} + +export function readQueryBody(url: URL): Record | null { + return parseEmbeddedJson(url.searchParams.get("body")); +} + +export function withUpdatedQueryBody( + url: URL, + updater: (body: Record) => Record +): string { + const body = readQueryBody(url) ?? {}; + const next = updater(body); + const nextUrl = new URL(url.toString()); + nextUrl.searchParams.set("body", JSON.stringify(next)); + return nextUrl.toString(); +} diff --git a/apps/api/src/server.jd-live.test.ts b/apps/api/src/server.jd-live.test.ts new file mode 100644 index 0000000..90f07f1 --- /dev/null +++ b/apps/api/src/server.jd-live.test.ts @@ -0,0 +1,288 @@ +import { describe, expect, it } from "vitest"; + +import type { + JdDetailPreviewResult, + JdLiveService, + JdLiveSessionSummary, + JdReviewsPreviewResult, + JdSearchPreviewResult +} from "./platforms/jd/types"; +import { createServer } from "./server"; + +function createJdLiveServiceStub( + overrides: Partial = {} +): JdLiveService { + let summary: JdLiveSessionSummary = { + configured: false, + hasCookie: false, + searchApiTemplate: { available: false }, + detailTemplate: { available: false }, + reviewsTemplate: { available: false } + }; + + return { + getSessionSummary() { + return overrides.getSessionSummary?.() ?? summary; + }, + importSession(input) { + if (overrides.importSession) { + return overrides.importSession(input); + } + + summary = { + configured: true, + importedAt: "2026-04-02T12:00:00.000Z", + hasCookie: true, + userAgent: input.userAgent ?? "stub-user-agent", + searchApiTemplate: { available: Boolean(input.searchApiTemplateUrl) }, + detailTemplate: { available: Boolean(input.detailTemplateUrl) }, + reviewsTemplate: { available: Boolean(input.reviewsTemplateUrl) } + }; + return summary; + }, + clearSession() { + if (overrides.clearSession) { + overrides.clearSession(); + return; + } + + summary = { + configured: false, + hasCookie: false, + searchApiTemplate: { available: false }, + detailTemplate: { available: false }, + reviewsTemplate: { available: false } + }; + }, + async previewSearch(query, mode) { + if (overrides.previewSearch) { + return overrides.previewSearch(query, mode); + } + + const preview: JdSearchPreviewResult = { + query, + source: "api", + candidateCount: 1, + candidates: [ + { + candidateId: "jd-100068388533", + platform: "jd", + title: "Apple iPhone 15", + price: 3898, + priceLabel: "CNY 3898", + storeName: "JD Self Operated", + productUrl: "https://item.jd.com/100068388533.html", + imageUrl: "https://img14.360buyimg.com/n2/jfs/t1/example.jpg", + salesHint: "sold 500+", + specLabel: "128GB", + highlights: ["A16"] + } + ] + }; + + return preview; + }, + async previewDetail(skuId) { + if (overrides.previewDetail) { + return overrides.previewDetail(skuId); + } + + const preview: JdDetailPreviewResult = { + skuId, + source: "api", + detail: { + skuId, + title: "Apple iPhone 15", + price: "4398.00", + originalPrice: "4599.00", + estimatedPrice: "3898", + shopName: "JD Self Operated", + vendorId: null, + categoryPath: ["phones", "smartphones", "apple"], + stockState: "in stock", + mainImage: "https://img14.360buyimg.com/n2/jfs/t1/example.jpg", + averageScore: null + } + }; + + return preview; + }, + async previewReviews(skuId, commentCount) { + if (overrides.previewReviews) { + return overrides.previewReviews(skuId, commentCount); + } + + const preview: JdReviewsPreviewResult = { + skuId, + source: "api", + reviews: { + skuId, + total: "10000", + goodRate: "95%", + pictureCount: "500", + tags: [ + { + tagId: "tag-1", + name: "clear camera", + count: "9313" + } + ], + comments: [ + { + id: "comment-1", + content: "smooth system and sharp photos", + score: "5", + creationTime: "2026-04-02 19:23:16", + userLevelName: "PLUS" + } + ] + } + }; + + return preview; + } + }; +} + +describe("JD live server endpoints", () => { + it("imports and clears a JD live session through dedicated endpoints", async () => { + const jdLiveService = createJdLiveServiceStub(); + const app = createServer({ jdLiveService }); + await app.ready(); + + const importResponse = await app.inject({ + method: "POST", + url: "/api/platforms/jd/live-session", + payload: { + cookieHeader: "thor=masked; pin=masked;", + searchApiTemplateUrl: + "https://api.m.jd.com/?functionId=pc_search_searchWare&body=%7B%22keyword%22:%22iphone%2015%22%7D", + detailTemplateUrl: + "https://api.m.jd.com/?functionId=pc_detailpage_wareBusiness&body=%7B%22skuId%22:%22100068388533%22%7D", + reviewsTemplateUrl: + "https://api.m.jd.com/?functionId=getLegoWareDetailComment&body=%7B%22sku%22:100068388533%7D" + } + }); + + expect(importResponse.statusCode).toBe(200); + expect(importResponse.json().session).toMatchObject({ + configured: true, + hasCookie: true, + searchApiTemplate: { + available: true + }, + detailTemplate: { + available: true + }, + reviewsTemplate: { + available: true + } + }); + + const summaryResponse = await app.inject({ + method: "GET", + url: "/api/platforms/jd/live-session" + }); + expect(summaryResponse.statusCode).toBe(200); + expect(summaryResponse.json().session).toMatchObject({ + configured: true, + hasCookie: true + }); + + const readinessResponse = await app.inject({ + method: "GET", + url: "/api/platforms/readiness" + }); + expect( + readinessResponse + .json() + .platforms.find((platform: { platform: string }) => platform.platform === "jd") + ).toMatchObject({ + platform: "jd", + ready: true, + status: "ready" + }); + + const clearResponse = await app.inject({ + method: "DELETE", + url: "/api/platforms/jd/live-session" + }); + expect(clearResponse.statusCode).toBe(204); + + const clearedSummaryResponse = await app.inject({ + method: "GET", + url: "/api/platforms/jd/live-session" + }); + expect(clearedSummaryResponse.json().session).toMatchObject({ + configured: false, + hasCookie: false + }); + + await app.close(); + }); + + it("exposes JD live preview endpoints through the injected service", async () => { + const jdLiveService = createJdLiveServiceStub(); + const app = createServer({ jdLiveService }); + await app.ready(); + + const searchResponse = await app.inject({ + method: "GET", + url: "/api/platforms/jd/live-search-preview?query=iPhone%2015" + }); + expect(searchResponse.statusCode).toBe(200); + expect(searchResponse.json().preview).toMatchObject({ + query: "iPhone 15", + source: "api", + candidateCount: 1 + }); + + const detailResponse = await app.inject({ + method: "GET", + url: "/api/platforms/jd/live-detail-preview?skuId=100068388533" + }); + expect(detailResponse.statusCode).toBe(200); + expect(detailResponse.json().preview.detail).toMatchObject({ + skuId: "100068388533", + shopName: "JD Self Operated" + }); + + const reviewsResponse = await app.inject({ + method: "GET", + url: "/api/platforms/jd/live-reviews-preview?skuId=100068388533&commentCount=3" + }); + expect(reviewsResponse.statusCode).toBe(200); + expect(reviewsResponse.json().preview.reviews).toMatchObject({ + skuId: "100068388533", + goodRate: "95%" + }); + + await app.close(); + }); + + it("surfaces JD live preview failures with service-provided status codes", async () => { + const jdLiveService = createJdLiveServiceStub({ + async previewDetail() { + const error = new Error("Imported detail template is bound to another sku.") as Error & { + statusCode: number; + }; + error.statusCode = 409; + throw error; + } + }); + const app = createServer({ jdLiveService }); + await app.ready(); + + const response = await app.inject({ + method: "GET", + url: "/api/platforms/jd/live-detail-preview?skuId=100068388533" + }); + + expect(response.statusCode).toBe(409); + expect(response.json()).toMatchObject({ + message: "Imported detail template is bound to another sku." + }); + + await app.close(); + }); +}); diff --git a/apps/api/src/server.ts b/apps/api/src/server.ts index f3b06c3..a70ad24 100644 --- a/apps/api/src/server.ts +++ b/apps/api/src/server.ts @@ -6,11 +6,14 @@ import type { import cors from "@fastify/cors"; import Fastify from "fastify"; +import { JdLiveSessionService, isJdLiveError } from "./platforms/jd/live-session"; +import type { JdLiveService, JdSearchMode } from "./platforms/jd/types"; import { InMemoryTaskStore } from "./store"; -export function createServer() { +export function createServer(options: { jdLiveService?: JdLiveService } = {}) { const app = Fastify({ logger: false }); const store = new InMemoryTaskStore(); + const jdLiveService = options.jdLiveService ?? new JdLiveSessionService(); app.register(cors, { origin: true }); @@ -54,11 +57,49 @@ export function createServer() { }; }); + app.get("/api/platforms/jd/live-session", async () => ({ + session: jdLiveService.getSessionSummary() + })); + + app.post<{ + Body: { + cookieHeader: string; + userAgent?: string; + searchApiTemplateUrl?: string; + detailTemplateUrl?: string; + reviewsTemplateUrl?: string; + searchReferer?: string; + detailReferer?: string; + }; + }>("/api/platforms/jd/live-session", async (request, reply) => { + try { + const session = jdLiveService.importSession(request.body); + store.preparePlatform("jd"); + reply.code(200); + return { session }; + } catch (error) { + reply.code(isJdLiveError(error) ? error.statusCode : 400); + return { + message: error instanceof Error ? error.message : "Invalid JD live session payload." + }; + } + }); + + app.delete("/api/platforms/jd/live-session", async (_request, reply) => { + jdLiveService.clearSession(); + store.clearPlatformSession("jd"); + reply.code(204); + return null; + }); + app.delete<{ Params: { platform: PlatformId }; }>("/api/sessions/:platform", async (request, reply) => { try { store.clearPlatformSession(request.params.platform); + if (request.params.platform === "jd") { + jdLiveService.clearSession(); + } reply.code(204); return null; } catch { @@ -172,6 +213,72 @@ export function createServer() { return { report }; }); + app.get<{ + Querystring: { query?: string; mode?: JdSearchMode }; + }>("/api/platforms/jd/live-search-preview", async (request, reply) => { + try { + const query = request.query.query?.trim(); + if (!query) { + reply.code(400); + return { message: "query is required." }; + } + + const preview = await jdLiveService.previewSearch(query, request.query.mode); + return { preview }; + } catch (error) { + reply.code(isJdLiveError(error) ? error.statusCode : 502); + return { + message: + error instanceof Error ? error.message : "JD live search preview failed." + }; + } + }); + + app.get<{ + Querystring: { skuId?: string }; + }>("/api/platforms/jd/live-detail-preview", async (request, reply) => { + try { + const skuId = request.query.skuId?.trim(); + if (!skuId) { + reply.code(400); + return { message: "skuId is required." }; + } + + const preview = await jdLiveService.previewDetail(skuId); + return { preview }; + } catch (error) { + reply.code(isJdLiveError(error) ? error.statusCode : 502); + return { + message: + error instanceof Error ? error.message : "JD live detail preview failed." + }; + } + }); + + app.get<{ + Querystring: { skuId?: string; commentCount?: string }; + }>("/api/platforms/jd/live-reviews-preview", async (request, reply) => { + try { + const skuId = request.query.skuId?.trim(); + if (!skuId) { + reply.code(400); + return { message: "skuId is required." }; + } + + const commentCount = request.query.commentCount + ? Number.parseInt(request.query.commentCount, 10) + : undefined; + const preview = await jdLiveService.previewReviews(skuId, commentCount); + return { preview }; + } catch (error) { + reply.code(isJdLiveError(error) ? error.statusCode : 502); + return { + message: + error instanceof Error ? error.message : "JD live reviews preview failed." + }; + } + }); + app.get("/api/history", async () => ({ tasks: store.listHistory() })); diff --git a/docs/CrawlerFeasibility.md b/docs/CrawlerFeasibility.md index 26443f5..9d24c02 100644 --- a/docs/CrawlerFeasibility.md +++ b/docs/CrawlerFeasibility.md @@ -73,12 +73,15 @@ 4. 同时可观察到 `cactus.jd.com/request_algo`、`jra.jd.com/jsTk.do`、`sgm-w.jd.com/h5` 等风控/参数初始化请求。 5. `https://item.jd.com/robots.txt` 的公开信息非常有限,不能据此推断搜索或详情路径可稳定匿名抓取。 参考: +6. 2026-04-02 登录后复测中,`https://search.jd.com/Search?keyword=iPhone%2015` 返回的是 Vite 壳页 HTML,响应内不再稳定包含商品卡片;同 query 的 `pc_search_searchWare` API 回放可稳定返回 30 条候选。 +7. 同次复测中,`pc_detailpage_wareBusiness` 与 `getLegoWareDetailComment` 在授权会话下可稳定返回价格、店铺、库存、主图、标签与评论正文。 判断: -1. 京东 PC 端存在明确的接口层,不是只能靠浏览器 DOM 抓。 +1. 京东 PC 端存在明确的接口层,不是只能靠浏览器 DOM 抓;其中搜索页 HTML 已明显退化为前端壳页,不能再把稳定 DOM 解析当作默认主路径。 2. 但接口调用明显依赖会话与动态参数上下文,不能把它当作无状态公开接口。 -3. 京东的非浏览器路线是可行的,但必须建立在“先会话、后请求”的体系上。 +3. 当前更稳的默认路线应收敛为“授权会话下的搜索/详情/评论 API 回放”,浏览器负责登录、模板刷新与阻塞恢复。 +4. 因此京东的非浏览器路线是可行的,但必须建立在“先会话、后请求”的体系上。 ### 3.3 淘宝