From 7e4cd5439d4d52edf49bb33af93afbfdb40afdbb Mon Sep 17 00:00:00 2001 From: ml-scout Date: Tue, 9 Jun 2026 11:49:18 +0800 Subject: [PATCH 1/2] =?UTF-8?q?fix(xiaohongshu/user):=20=E7=99=BB=E5=BD=95?= =?UTF-8?q?=E5=A2=99=E6=8A=A5=20AUTH=5FREQUIRED=20+=20=E4=BF=AE=20hydratio?= =?UTF-8?q?n=20=E7=AB=9E=E6=80=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `xiaohongshu user` 在两种场景下都误报,下游无法区分: ## 场景 1:登录态失效被误报成 "Malformed user store" / EMPTY_RESULT 小红书 profile 页比 search 更吃登录态。会话失效 / 被风控降级时,访问 `/user/profile/` 会 302 重定向到 `/login`(`__INITIAL_STATE__.user.loggedIn=false`, 页面挂登录二维码)。老逻辑读到登录页的空 user store → 抛 "Malformed Xiaohongshu user snapshot: user store was not found",或等到 store 出现却 notes 全空 → EMPTY_RESULT。 下游(如 ml-scout)据此把**登录失效**误判成**解析失败 / 空号**:白等 rate-limit cooldown、误锁平台。实测 2026-06-09:风控把 profile 浏览态降级 → 整批 seed 创作者全 302 到 /login,被报成 Malformed,排查了很久才定位是登录墙。 修复:USER_SNAPSHOT_JS 增加 `loginWall` 检测(pathname 落在 /login,或 loggedIn===false), 命中即抛 `AuthRequiredError`(code AUTH_REQUIRED,exit NOPERM)。语义正确,下游一眼可辨, 提示用户重登 xiaohongshu.com。 ## 场景 2:hydration 竞态(间歇性 "user store was not found") `__INITIAL_STATE__.user` 由 SSR / client bootstrap 异步注入;`page.goto` 后**立刻** `page.evaluate` 会撞 hydration 窗口 → store/notes 尚未就绪。note.js(`page.wait({time:2+rand*3})`) 与 download.js(`page.wait({time:1+rand*2})`)早已用 goto 后等待规避,唯独 user.js 漏了 → 慢加载必现、快加载侥幸过的间歇性失败(2026-06-09 复现、2026-05-20 亦有记录)。 修复:新增 `readUserSnapshotHydrated`——先快读一次(已就绪零额外延迟,保住快加载路径), 未拿到笔记**且非登录墙**就 wait 后重试至多 maxRetries 次。笔记是 `[tab[], ...]` 形态、首屏 可能晚于 store 填充,故用 `countFlatNotes`(展平后真实条数)作为就绪判据,而非 store 在即停。 登录墙命中立即停(再等无用);真·空号走满重试后由 EmptyResultError 正确收尾。 ## 测试 - 新增 clis/xiaohongshu/user.test.js(14 例):countFlatNotes / isLoginWallSnapshot / readUserSnapshotHydrated(快路径不 wait、慢加载重试、登录墙即停、空号走满重试)/ command.func(登录墙→AUTH_REQUIRED、笔记晚到→重试成功、空号→EMPTY_RESULT) - 端到端:对真实登录失效会话,`opencli xiaohongshu user ` 现报 AUTH_REQUIRED(exit 77), 替代旧的误导性 Malformed/EMPTY - xiaohongshu + rednote 全套 231 例通过(USER_SNAPSHOT_JS 被 rednote 复用,loginWall 为 additive 字段、无回归) --- clis/xiaohongshu/user.js | 52 +++++++++++++- clis/xiaohongshu/user.test.js | 130 ++++++++++++++++++++++++++++++++++ 2 files changed, 180 insertions(+), 2 deletions(-) create mode 100644 clis/xiaohongshu/user.test.js diff --git a/clis/xiaohongshu/user.js b/clis/xiaohongshu/user.js index 2b384dd0e..0bd322122 100644 --- a/clis/xiaohongshu/user.js +++ b/clis/xiaohongshu/user.js @@ -1,5 +1,5 @@ import { cli, Strategy } from '@jackwener/opencli/registry'; -import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; +import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors'; import { extractXhsUserNotes, normalizeXhsUserId } from './user-helpers.js'; /** * Host-agnostic IIFE that snapshots the user profile's Pinia store. Exported @@ -19,12 +19,19 @@ export const USER_SNAPSHOT_JS = ` const hasUserStore = Boolean(userStore && typeof userStore === 'object'); const rawNotes = hasUserStore ? (userStore.notes?._value || userStore.notes) : undefined; const rawPageData = hasUserStore ? (userStore.userPageData?._value || userStore.userPageData) : undefined; + // 登录墙检测:小红书 profile 页比 search 更吃登录态,会话失效/被风控降级时访问 + // /user/profile/ 会 302 到 /login(loggedIn=false)。用 indexOf 而非正则—— + // 本块是嵌在模板字符串里的 JS,正则 \\b 会被模板解析成退格符。 + const loggedInVal = hasUserStore ? (userStore.loggedIn?._value ?? userStore.loggedIn) : undefined; + const pathName = (typeof location !== 'undefined' && location.pathname) ? location.pathname : ''; + const onLoginPage = pathName.indexOf('/login') === 0; return { noteGroups: safeClone(rawNotes || []), pageData: safeClone(rawPageData || {}), storePresent: hasUserStore, notesPresent: Array.isArray(rawNotes), pageDataPresent: Boolean(rawPageData && typeof rawPageData === 'object' && Object.keys(rawPageData).length > 0), + loginWall: Boolean(onLoginPage || loggedInVal === false), }; })() `; @@ -42,6 +49,40 @@ export function assertReadableUserSnapshot(snapshot) { throw new CommandExecutionError('Malformed Xiaohongshu user snapshot: notes array was not found'); } } +/** 展平 noteGroups 后的真实笔记条数。小红书 user store 的 notes 是 [tab[], tab[], ...] + * 形态(每个 tab 一个数组),首屏笔记在其中某个 tab 里;这里数所有 tab 里的笔记总数。 */ +export function countFlatNotes(snapshot) { + const groups = snapshot?.noteGroups; + if (!Array.isArray(groups)) + return 0; + let n = 0; + for (const g of groups) + n += Array.isArray(g) ? g.length : (g ? 1 : 0); + return n; +} +/** 页面是否被登录墙挡(302 到 /login,或 user store loggedIn=false)。 */ +export function isLoginWallSnapshot(snapshot) { + return Boolean(snapshot && typeof snapshot === 'object' && snapshot.loginWall === true); +} +/** + * 读取 user 快照,带 hydration 等待 + 重试。修两个真实坑: + * 1) 慢加载竞态:`__INITIAL_STATE__.user` 由 SSR/client bootstrap 异步注入,`page.goto` 后 + * 立刻 evaluate 会撞 hydration 窗口 → store/notes 尚未就绪。note.js / download.js 早用 + * `page.wait` 规避,唯独 user.js 漏了 → 间歇性 "user store was not found"(2026-06-09 整批 + * seed 全挂、2026-05-20 亦复现)。 + * 2) 笔记懒加载:`notes` 是 [tab[], ...] 形态,首屏笔记可能比 store 更晚填充。 + * 策略:先快读一次(页面已就绪则零额外延迟,保住快加载路径);未拿到笔记**且非登录墙**就 + * `page.wait` 后重试至多 maxRetries 次。命中登录墙立即停(再等无用,交给 caller 抛 AUTH_REQUIRED); + * 真·空号(销号/私密/全删)走满重试后返回空快照,由下游 EmptyResultError 正确收尾。导出供测试。 + */ +export async function readUserSnapshotHydrated(page, maxRetries = 8, waitSeconds = 2) { + let snapshot = await readUserSnapshot(page); + for (let i = 0; i < maxRetries && !isLoginWallSnapshot(snapshot) && countFlatNotes(snapshot) === 0; i += 1) { + await page.wait({ time: waitSeconds }); + snapshot = await readUserSnapshot(page); + } + return snapshot; +} export const command = cli({ site: 'xiaohongshu', name: 'user', @@ -60,7 +101,14 @@ export const command = cli({ const userId = normalizeXhsUserId(String(kwargs.id)); const limit = Math.max(1, Number(kwargs.limit ?? 15)); await page.goto(`https://www.xiaohongshu.com/user/profile/${userId}`); - let snapshot = await readUserSnapshot(page); + let snapshot = await readUserSnapshotHydrated(page); + if (isLoginWallSnapshot(snapshot)) { + // profile 页登录态失效 → 302 到 /login。绝不能误报成 "Malformed user store" / + // EMPTY_RESULT —— 那会让下游(ml-scout 等)把登录失效当解析失败 / 空号,白等 + // rate-limit cooldown(实测 2026-06-09:风控把 profile 浏览态降级 → 整批 seed + // 重定向到 /login)。抛 AUTH_REQUIRED,让 caller 提示用户重登 xiaohongshu.com。 + throw new AuthRequiredError('xiaohongshu.com', 'Xiaohongshu profile requires login (page redirected to /login or session expired); re-login to xiaohongshu.com and retry.'); + } assertReadableUserSnapshot(snapshot); let results = extractXhsUserNotes(snapshot ?? {}, userId); let previousCount = results.length; diff --git a/clis/xiaohongshu/user.test.js b/clis/xiaohongshu/user.test.js new file mode 100644 index 000000000..c6cd0d4f9 --- /dev/null +++ b/clis/xiaohongshu/user.test.js @@ -0,0 +1,130 @@ +import { describe, expect, it, vi } from 'vitest'; +import { getRegistry } from '@jackwener/opencli/registry'; +import './user.js'; +import { countFlatNotes, isLoginWallSnapshot, readUserSnapshotHydrated, assertReadableUserSnapshot, } from './user.js'; + +// 构造各类 user 快照(与 USER_SNAPSHOT_JS 返回形状一致)。 +function snap(overrides = {}) { + return { + noteGroups: [], + pageData: {}, + storePresent: true, + notesPresent: true, + pageDataPresent: true, + loginWall: false, + ...overrides, + }; +} +// 一条可被 extractXhsUserNotes 解析的笔记,包在 tab 分组里:notes = [tab[], ...]。 +function noteEntry(id) { + return { noteCard: { noteId: id, displayTitle: 't-' + id, type: 'normal', interactInfo: { likedCount: 3 } } }; +} +const NOTES_SNAP = snap({ noteGroups: [[noteEntry('aaa')], [], []] }); +const EMPTY_GROUPS_SNAP = snap({ noteGroups: [[], [], [], [], []] }); // store 在但笔记未填充 +const LOGIN_WALL_SNAP = snap({ noteGroups: [[], [], [], [], []], loginWall: true }); +const NO_STORE_SNAP = snap({ noteGroups: [], storePresent: false, notesPresent: false }); + +function createPageMock(evaluateImpl) { + return { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: evaluateImpl, + wait: vi.fn().mockResolvedValue(undefined), + autoScroll: vi.fn().mockResolvedValue(undefined), + }; +} + +describe('countFlatNotes', () => { + it('5 个空 tab 分组 → 0', () => { + expect(countFlatNotes(EMPTY_GROUPS_SNAP)).toBe(0); + }); + it('嵌套数组求和', () => { + expect(countFlatNotes(snap({ noteGroups: [[noteEntry('a'), noteEntry('b')], [noteEntry('c')]] }))).toBe(3); + }); + it('非数组 / 缺字段 → 0', () => { + expect(countFlatNotes({ noteGroups: null })).toBe(0); + expect(countFlatNotes(null)).toBe(0); + }); +}); + +describe('isLoginWallSnapshot', () => { + it('loginWall=true → true', () => { + expect(isLoginWallSnapshot(LOGIN_WALL_SNAP)).toBe(true); + }); + it('loginWall=false / 缺失 → false', () => { + expect(isLoginWallSnapshot(NOTES_SNAP)).toBe(false); + expect(isLoginWallSnapshot(snap({ loginWall: undefined }))).toBe(false); + expect(isLoginWallSnapshot(null)).toBe(false); + }); +}); + +describe('readUserSnapshotHydrated', () => { + it('快路径:首读即有笔记 → 不 wait、不重试', async () => { + const page = createPageMock(vi.fn().mockResolvedValue(NOTES_SNAP)); + const out = await readUserSnapshotHydrated(page); + expect(out).toBe(NOTES_SNAP); + expect(page.evaluate).toHaveBeenCalledTimes(1); + expect(page.wait).not.toHaveBeenCalled(); + }); + it('慢加载:store/notes 晚到 → 重试直到笔记出现(回归 2026-06-09 hydration 竞态)', async () => { + const page = createPageMock(vi + .fn() + .mockResolvedValueOnce(NO_STORE_SNAP) // goto 后立刻读:store 还没 hydrate + .mockResolvedValueOnce(EMPTY_GROUPS_SNAP) // store 在了但 notes 空 + .mockResolvedValue(NOTES_SNAP)); // 笔记终于填充 + const out = await readUserSnapshotHydrated(page); + expect(countFlatNotes(out)).toBe(1); + expect(page.wait).toHaveBeenCalled(); // 确实等过 + }); + it('登录墙:命中即停,不浪费重试预算', async () => { + const page = createPageMock(vi.fn().mockResolvedValue(LOGIN_WALL_SNAP)); + const out = await readUserSnapshotHydrated(page); + expect(isLoginWallSnapshot(out)).toBe(true); + expect(page.evaluate).toHaveBeenCalledTimes(1); // 没进重试循环 + expect(page.wait).not.toHaveBeenCalled(); + }); + it('真·空号:走满重试后返回空快照(交给下游 EmptyResultError)', async () => { + const page = createPageMock(vi.fn().mockResolvedValue(EMPTY_GROUPS_SNAP)); + const out = await readUserSnapshotHydrated(page, 3, 0.01); + expect(countFlatNotes(out)).toBe(0); + expect(isLoginWallSnapshot(out)).toBe(false); + expect(page.wait).toHaveBeenCalledTimes(3); // maxRetries 次 + }); +}); + +describe('xiaohongshu user command', () => { + const command = getRegistry().get('xiaohongshu/user'); + + it('登录墙 → 抛 AUTH_REQUIRED(不再误报 Malformed/EMPTY)', async () => { + const page = createPageMock(vi.fn().mockResolvedValue(LOGIN_WALL_SNAP)); + await expect(command.func(page, { id: '56d290df84edcd782a3c8748', limit: 5 })).rejects.toMatchObject({ + code: 'AUTH_REQUIRED', + }); + }); + + it('笔记晚到 → 重试后成功返回(端到端回归)', async () => { + const page = createPageMock(vi + .fn() + .mockResolvedValueOnce(NO_STORE_SNAP) + .mockResolvedValueOnce(EMPTY_GROUPS_SNAP) + .mockResolvedValue(NOTES_SNAP)); + const rows = await command.func(page, { id: 'someuser', limit: 5 }); + expect(rows).toHaveLength(1); + expect(rows[0].id).toBe('aaa'); + }); + + it('真·空号 → 抛 EMPTY_RESULT', async () => { + const page = createPageMock(vi.fn().mockResolvedValue(EMPTY_GROUPS_SNAP)); + await expect(command.func(page, { id: 'emptyuser', limit: 5 })).rejects.toMatchObject({ + code: 'EMPTY_RESULT', + }); + }); +}); + +describe('assertReadableUserSnapshot (既有契约保持)', () => { + it('storePresent=false → Malformed: user store was not found', () => { + expect(() => assertReadableUserSnapshot(NO_STORE_SNAP)).toThrow(/user store was not found/); + }); + it('正常快照不抛', () => { + expect(() => assertReadableUserSnapshot(NOTES_SNAP)).not.toThrow(); + }); +}); From 0cf2dc27073e547327bac252b6797ed3b60b0966 Mon Sep 17 00:00:00 2001 From: jackwener Date: Mon, 15 Jun 2026 18:00:37 +0800 Subject: [PATCH 2/2] fix(xiaohongshu): map user scroll login wall to auth error --- clis/xiaohongshu/user.js | 8 +++++++- clis/xiaohongshu/user.test.js | 11 +++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/clis/xiaohongshu/user.js b/clis/xiaohongshu/user.js index 0bd322122..ff9d54547 100644 --- a/clis/xiaohongshu/user.js +++ b/clis/xiaohongshu/user.js @@ -64,6 +64,9 @@ export function countFlatNotes(snapshot) { export function isLoginWallSnapshot(snapshot) { return Boolean(snapshot && typeof snapshot === 'object' && snapshot.loginWall === true); } +function throwLoginWallAuthRequired() { + throw new AuthRequiredError('xiaohongshu.com', 'Xiaohongshu profile requires login (page redirected to /login or session expired); re-login to xiaohongshu.com and retry.'); +} /** * 读取 user 快照,带 hydration 等待 + 重试。修两个真实坑: * 1) 慢加载竞态:`__INITIAL_STATE__.user` 由 SSR/client bootstrap 异步注入,`page.goto` 后 @@ -107,7 +110,7 @@ export const command = cli({ // EMPTY_RESULT —— 那会让下游(ml-scout 等)把登录失效当解析失败 / 空号,白等 // rate-limit cooldown(实测 2026-06-09:风控把 profile 浏览态降级 → 整批 seed // 重定向到 /login)。抛 AUTH_REQUIRED,让 caller 提示用户重登 xiaohongshu.com。 - throw new AuthRequiredError('xiaohongshu.com', 'Xiaohongshu profile requires login (page redirected to /login or session expired); re-login to xiaohongshu.com and retry.'); + throwLoginWallAuthRequired(); } assertReadableUserSnapshot(snapshot); let results = extractXhsUserNotes(snapshot ?? {}, userId); @@ -116,6 +119,9 @@ export const command = cli({ await page.autoScroll({ times: 1, delayMs: 1500 }); await page.wait(1); snapshot = await readUserSnapshot(page); + if (isLoginWallSnapshot(snapshot)) { + throwLoginWallAuthRequired(); + } assertReadableUserSnapshot(snapshot); const nextResults = extractXhsUserNotes(snapshot ?? {}, userId); if (nextResults.length <= previousCount) diff --git a/clis/xiaohongshu/user.test.js b/clis/xiaohongshu/user.test.js index c6cd0d4f9..91d6f6204 100644 --- a/clis/xiaohongshu/user.test.js +++ b/clis/xiaohongshu/user.test.js @@ -112,6 +112,17 @@ describe('xiaohongshu user command', () => { expect(rows[0].id).toBe('aaa'); }); + it('滚动续读时被重定向到登录墙 → 抛 AUTH_REQUIRED', async () => { + const page = createPageMock(vi + .fn() + .mockResolvedValueOnce(snap({ noteGroups: [[noteEntry('aaa')]] })) + .mockResolvedValueOnce(LOGIN_WALL_SNAP)); + await expect(command.func(page, { id: 'someuser', limit: 5 })).rejects.toMatchObject({ + code: 'AUTH_REQUIRED', + }); + expect(page.autoScroll).toHaveBeenCalledTimes(1); + }); + it('真·空号 → 抛 EMPTY_RESULT', async () => { const page = createPageMock(vi.fn().mockResolvedValue(EMPTY_GROUPS_SNAP)); await expect(command.func(page, { id: 'emptyuser', limit: 5 })).rejects.toMatchObject({