Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 56 additions & 2 deletions clis/xiaohongshu/user.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { cli, Strategy } from '@jackwener/opencli/registry';
import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
import { extractXhsUserNotes, normalizeXhsUserId } from './user-helpers.js';
/**
* Host-agnostic IIFE that snapshots the user profile's Pinia store. Exported
Expand All @@ -19,12 +19,19 @@ export const USER_SNAPSHOT_JS = `
const hasUserStore = Boolean(userStore && typeof userStore === 'object');
const rawNotes = hasUserStore ? (userStore.notes?._value || userStore.notes) : undefined;
const rawPageData = hasUserStore ? (userStore.userPageData?._value || userStore.userPageData) : undefined;
// 登录墙检测:小红书 profile 页比 search 更吃登录态,会话失效/被风控降级时访问
// /user/profile/<id> 会 302 到 /login(loggedIn=false)。用 indexOf 而非正则——
// 本块是嵌在模板字符串里的 JS,正则 \\b 会被模板解析成退格符。
const loggedInVal = hasUserStore ? (userStore.loggedIn?._value ?? userStore.loggedIn) : undefined;
const pathName = (typeof location !== 'undefined' && location.pathname) ? location.pathname : '';
const onLoginPage = pathName.indexOf('/login') === 0;
return {
noteGroups: safeClone(rawNotes || []),
pageData: safeClone(rawPageData || {}),
storePresent: hasUserStore,
notesPresent: Array.isArray(rawNotes),
pageDataPresent: Boolean(rawPageData && typeof rawPageData === 'object' && Object.keys(rawPageData).length > 0),
loginWall: Boolean(onLoginPage || loggedInVal === false),
};
})()
`;
Expand All @@ -42,6 +49,43 @@ export function assertReadableUserSnapshot(snapshot) {
throw new CommandExecutionError('Malformed Xiaohongshu user snapshot: notes array was not found');
}
}
/** 展平 noteGroups 后的真实笔记条数。小红书 user store 的 notes 是 [tab[], tab[], ...]
* 形态(每个 tab 一个数组),首屏笔记在其中某个 tab 里;这里数所有 tab 里的笔记总数。 */
export function countFlatNotes(snapshot) {
const groups = snapshot?.noteGroups;
if (!Array.isArray(groups))
return 0;
let n = 0;
for (const g of groups)
n += Array.isArray(g) ? g.length : (g ? 1 : 0);
return n;
}
/** 页面是否被登录墙挡(302 到 /login,或 user store loggedIn=false)。 */
export function isLoginWallSnapshot(snapshot) {
return Boolean(snapshot && typeof snapshot === 'object' && snapshot.loginWall === true);
}
function throwLoginWallAuthRequired() {
throw new AuthRequiredError('xiaohongshu.com', 'Xiaohongshu profile requires login (page redirected to /login or session expired); re-login to xiaohongshu.com and retry.');
}
/**
* 读取 user 快照,带 hydration 等待 + 重试。修两个真实坑:
* 1) 慢加载竞态:`__INITIAL_STATE__.user` 由 SSR/client bootstrap 异步注入,`page.goto` 后
* 立刻 evaluate 会撞 hydration 窗口 → store/notes 尚未就绪。note.js / download.js 早用
* `page.wait` 规避,唯独 user.js 漏了 → 间歇性 "user store was not found"(2026-06-09 整批
* seed 全挂、2026-05-20 亦复现)。
* 2) 笔记懒加载:`notes` 是 [tab[], ...] 形态,首屏笔记可能比 store 更晚填充。
* 策略:先快读一次(页面已就绪则零额外延迟,保住快加载路径);未拿到笔记**且非登录墙**就
* `page.wait` 后重试至多 maxRetries 次。命中登录墙立即停(再等无用,交给 caller 抛 AUTH_REQUIRED);
* 真·空号(销号/私密/全删)走满重试后返回空快照,由下游 EmptyResultError 正确收尾。导出供测试。
*/
export async function readUserSnapshotHydrated(page, maxRetries = 8, waitSeconds = 2) {
let snapshot = await readUserSnapshot(page);
for (let i = 0; i < maxRetries && !isLoginWallSnapshot(snapshot) && countFlatNotes(snapshot) === 0; i += 1) {
await page.wait({ time: waitSeconds });
snapshot = await readUserSnapshot(page);
}
return snapshot;
}
export const command = cli({
site: 'xiaohongshu',
name: 'user',
Expand All @@ -60,14 +104,24 @@ export const command = cli({
const userId = normalizeXhsUserId(String(kwargs.id));
const limit = Math.max(1, Number(kwargs.limit ?? 15));
await page.goto(`https://www.xiaohongshu.com/user/profile/${userId}`);
let snapshot = await readUserSnapshot(page);
let snapshot = await readUserSnapshotHydrated(page);
if (isLoginWallSnapshot(snapshot)) {
// profile 页登录态失效 → 302 到 /login。绝不能误报成 "Malformed user store" /
// EMPTY_RESULT —— 那会让下游(ml-scout 等)把登录失效当解析失败 / 空号,白等
// rate-limit cooldown(实测 2026-06-09:风控把 profile 浏览态降级 → 整批 seed
// 重定向到 /login)。抛 AUTH_REQUIRED,让 caller 提示用户重登 xiaohongshu.com。
throwLoginWallAuthRequired();
}
assertReadableUserSnapshot(snapshot);
let results = extractXhsUserNotes(snapshot ?? {}, userId);
let previousCount = results.length;
for (let i = 0; results.length < limit && i < 4; i += 1) {
await page.autoScroll({ times: 1, delayMs: 1500 });
await page.wait(1);
snapshot = await readUserSnapshot(page);
if (isLoginWallSnapshot(snapshot)) {
throwLoginWallAuthRequired();
}
assertReadableUserSnapshot(snapshot);
const nextResults = extractXhsUserNotes(snapshot ?? {}, userId);
if (nextResults.length <= previousCount)
Expand Down
141 changes: 141 additions & 0 deletions clis/xiaohongshu/user.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import { describe, expect, it, vi } from 'vitest';
import { getRegistry } from '@jackwener/opencli/registry';
import './user.js';
import { countFlatNotes, isLoginWallSnapshot, readUserSnapshotHydrated, assertReadableUserSnapshot, } from './user.js';

// 构造各类 user 快照(与 USER_SNAPSHOT_JS 返回形状一致)。
function snap(overrides = {}) {
return {
noteGroups: [],
pageData: {},
storePresent: true,
notesPresent: true,
pageDataPresent: true,
loginWall: false,
...overrides,
};
}
// 一条可被 extractXhsUserNotes 解析的笔记,包在 tab 分组里:notes = [tab[], ...]。
function noteEntry(id) {
return { noteCard: { noteId: id, displayTitle: 't-' + id, type: 'normal', interactInfo: { likedCount: 3 } } };
}
const NOTES_SNAP = snap({ noteGroups: [[noteEntry('aaa')], [], []] });
const EMPTY_GROUPS_SNAP = snap({ noteGroups: [[], [], [], [], []] }); // store 在但笔记未填充
const LOGIN_WALL_SNAP = snap({ noteGroups: [[], [], [], [], []], loginWall: true });
const NO_STORE_SNAP = snap({ noteGroups: [], storePresent: false, notesPresent: false });

function createPageMock(evaluateImpl) {
return {
goto: vi.fn().mockResolvedValue(undefined),
evaluate: evaluateImpl,
wait: vi.fn().mockResolvedValue(undefined),
autoScroll: vi.fn().mockResolvedValue(undefined),
};
}

describe('countFlatNotes', () => {
it('5 个空 tab 分组 → 0', () => {
expect(countFlatNotes(EMPTY_GROUPS_SNAP)).toBe(0);
});
it('嵌套数组求和', () => {
expect(countFlatNotes(snap({ noteGroups: [[noteEntry('a'), noteEntry('b')], [noteEntry('c')]] }))).toBe(3);
});
it('非数组 / 缺字段 → 0', () => {
expect(countFlatNotes({ noteGroups: null })).toBe(0);
expect(countFlatNotes(null)).toBe(0);
});
});

describe('isLoginWallSnapshot', () => {
it('loginWall=true → true', () => {
expect(isLoginWallSnapshot(LOGIN_WALL_SNAP)).toBe(true);
});
it('loginWall=false / 缺失 → false', () => {
expect(isLoginWallSnapshot(NOTES_SNAP)).toBe(false);
expect(isLoginWallSnapshot(snap({ loginWall: undefined }))).toBe(false);
expect(isLoginWallSnapshot(null)).toBe(false);
});
});

describe('readUserSnapshotHydrated', () => {
it('快路径:首读即有笔记 → 不 wait、不重试', async () => {
const page = createPageMock(vi.fn().mockResolvedValue(NOTES_SNAP));
const out = await readUserSnapshotHydrated(page);
expect(out).toBe(NOTES_SNAP);
expect(page.evaluate).toHaveBeenCalledTimes(1);
expect(page.wait).not.toHaveBeenCalled();
});
it('慢加载:store/notes 晚到 → 重试直到笔记出现(回归 2026-06-09 hydration 竞态)', async () => {
const page = createPageMock(vi
.fn()
.mockResolvedValueOnce(NO_STORE_SNAP) // goto 后立刻读:store 还没 hydrate
.mockResolvedValueOnce(EMPTY_GROUPS_SNAP) // store 在了但 notes 空
.mockResolvedValue(NOTES_SNAP)); // 笔记终于填充
const out = await readUserSnapshotHydrated(page);
expect(countFlatNotes(out)).toBe(1);
expect(page.wait).toHaveBeenCalled(); // 确实等过
});
it('登录墙:命中即停,不浪费重试预算', async () => {
const page = createPageMock(vi.fn().mockResolvedValue(LOGIN_WALL_SNAP));
const out = await readUserSnapshotHydrated(page);
expect(isLoginWallSnapshot(out)).toBe(true);
expect(page.evaluate).toHaveBeenCalledTimes(1); // 没进重试循环
expect(page.wait).not.toHaveBeenCalled();
});
it('真·空号:走满重试后返回空快照(交给下游 EmptyResultError)', async () => {
const page = createPageMock(vi.fn().mockResolvedValue(EMPTY_GROUPS_SNAP));
const out = await readUserSnapshotHydrated(page, 3, 0.01);
expect(countFlatNotes(out)).toBe(0);
expect(isLoginWallSnapshot(out)).toBe(false);
expect(page.wait).toHaveBeenCalledTimes(3); // maxRetries 次
});
});

describe('xiaohongshu user command', () => {
const command = getRegistry().get('xiaohongshu/user');

it('登录墙 → 抛 AUTH_REQUIRED(不再误报 Malformed/EMPTY)', async () => {
const page = createPageMock(vi.fn().mockResolvedValue(LOGIN_WALL_SNAP));
await expect(command.func(page, { id: '56d290df84edcd782a3c8748', limit: 5 })).rejects.toMatchObject({
code: 'AUTH_REQUIRED',
});
});

it('笔记晚到 → 重试后成功返回(端到端回归)', async () => {
const page = createPageMock(vi
.fn()
.mockResolvedValueOnce(NO_STORE_SNAP)
.mockResolvedValueOnce(EMPTY_GROUPS_SNAP)
.mockResolvedValue(NOTES_SNAP));
const rows = await command.func(page, { id: 'someuser', limit: 5 });
expect(rows).toHaveLength(1);
expect(rows[0].id).toBe('aaa');
});

it('滚动续读时被重定向到登录墙 → 抛 AUTH_REQUIRED', async () => {
const page = createPageMock(vi
.fn()
.mockResolvedValueOnce(snap({ noteGroups: [[noteEntry('aaa')]] }))
.mockResolvedValueOnce(LOGIN_WALL_SNAP));
await expect(command.func(page, { id: 'someuser', limit: 5 })).rejects.toMatchObject({
code: 'AUTH_REQUIRED',
});
expect(page.autoScroll).toHaveBeenCalledTimes(1);
});

it('真·空号 → 抛 EMPTY_RESULT', async () => {
const page = createPageMock(vi.fn().mockResolvedValue(EMPTY_GROUPS_SNAP));
await expect(command.func(page, { id: 'emptyuser', limit: 5 })).rejects.toMatchObject({
code: 'EMPTY_RESULT',
});
});
});

describe('assertReadableUserSnapshot (既有契约保持)', () => {
it('storePresent=false → Malformed: user store was not found', () => {
expect(() => assertReadableUserSnapshot(NO_STORE_SNAP)).toThrow(/user store was not found/);
});
it('正常快照不抛', () => {
expect(() => assertReadableUserSnapshot(NOTES_SNAP)).not.toThrow();
});
});