diff --git a/src/services/browser/UrlContentFetcher.ts b/src/services/browser/UrlContentFetcher.ts index caf19ee83b1..f7111bec408 100644 --- a/src/services/browser/UrlContentFetcher.ts +++ b/src/services/browser/UrlContentFetcher.ts @@ -45,10 +45,14 @@ export class UrlContentFetcher { return } const stats = await this.ensureChromiumExists() + const args = [ + "--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36", + ] + if (process.getuid && process.getuid() === 0) { + args.push("--no-sandbox") + } this.browser = await stats.puppeteer.launch({ - args: [ - "--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36", - ], + args, executablePath: stats.executablePath, }) // (latest version of puppeteer does not add headless to user agent) diff --git a/src/services/browser/__tests__/UrlContentFetcher.test.ts b/src/services/browser/__tests__/UrlContentFetcher.test.ts new file mode 100644 index 00000000000..52f9ea34f6a --- /dev/null +++ b/src/services/browser/__tests__/UrlContentFetcher.test.ts @@ -0,0 +1,85 @@ +import * as vscode from "vscode" +import { UrlContentFetcher } from "../UrlContentFetcher" +import * as fs from "fs/promises" // Import the fs module + +// Mock PCR and puppeteer-core +jest.mock("puppeteer-chromium-resolver", () => { + return jest.fn().mockResolvedValue({ + puppeteer: { + launch: jest.fn().mockResolvedValue({ + newPage: jest.fn().mockResolvedValue({ + goto: jest.fn().mockResolvedValue(undefined), + content: jest.fn().mockResolvedValue("Mocked HTML"), + }), + close: jest.fn().mockResolvedValue(undefined), + }), + }, + executablePath: "/mocked/path/to/chromium", + }) +}) + +jest.mock("fs/promises", () => ({ + ...jest.requireActual("fs/promises"), // Import and retain default behavior + mkdir: jest.fn().mockResolvedValue(undefined), // Mock mkdir +})) + +describe("UrlContentFetcher", () => { + let context: vscode.ExtensionContext + let urlContentFetcher: UrlContentFetcher + + beforeEach(() => { + context = { + globalStorageUri: { fsPath: "/mock/globalStoragePath" } as vscode.Uri, + } as vscode.ExtensionContext + urlContentFetcher = new UrlContentFetcher(context) + }) + + afterEach(async () => { + await urlContentFetcher.closeBrowser() + jest.restoreAllMocks() + }) + + it("should add --no-sandbox flag when running as root", async () => { + const originalGetuid = process.getuid + // @ts-ignore + process.getuid = jest.fn(() => 0) // Mock getuid to return 0 (root user) + + await urlContentFetcher.launchBrowser() + + const pcr = require("puppeteer-chromium-resolver") + const puppeteerLaunchMock = pcr.mock.results[0].value.puppeteer.launch + expect(puppeteerLaunchMock).toHaveBeenCalledWith( + expect.objectContaining({ + args: expect.arrayContaining(["--no-sandbox"]), + }), + ) + + // Restore original getuid + process.getuid = originalGetuid + }) + + it("should not add --no-sandbox flag when not running as root", async () => { + const originalGetuid = process.getuid + // @ts-ignore + process.getuid = jest.fn(() => 1000) // Mock getuid to return a non-root UID + + await urlContentFetcher.launchBrowser() + + const pcr = require("puppeteer-chromium-resolver") + const puppeteerLaunchMock = pcr.mock.results[0].value.puppeteer.launch + expect(puppeteerLaunchMock).toHaveBeenCalledWith( + expect.objectContaining({ + args: expect.not.arrayContaining(["--no-sandbox"]), + }), + ) + + // Restore original getuid + process.getuid = originalGetuid + }) + + it("should fetch and convert URL to markdown", async () => { + await urlContentFetcher.launchBrowser() + const markdown = await urlContentFetcher.urlToMarkdown("https://example.com") + expect(markdown).toBe("Mocked HTML") // Turndown would convert Mocked HTML to "Mocked HTML" + }) +})