diff --git a/src/.claude/settings.local.json b/src/.claude/settings.local.json deleted file mode 100644 index 11bf37a4f9..0000000000 --- a/src/.claude/settings.local.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(pnpm tsc:*)", - "Bash(pnpm build:*)", - "Bash(git add:*)", - "Bash(git commit:*)", - "Bash(node:*)", - "Bash(grep:*)", - "Bash(find:*)", - "WebFetch(domain:github.com)", - "WebFetch(domain:cocalc.com)", - "WebFetch(domain:doc.cocalc.com)", - "Bash(npm show:*)", - "Bash(prettier -w:*)", - "Bash(npx tsc:*)", - "Bash(gh pr view:*)", - "Bash(gh:*)" - ], - "deny": [] - } -} \ No newline at end of file diff --git a/src/CLAUDE.md b/src/CLAUDE.md index a816597ef0..48ebab2e32 100644 --- a/src/CLAUDE.md +++ b/src/CLAUDE.md @@ -4,15 +4,16 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co # CoCalc Source Repository -* This is the source code of CoCalc in a Git repository -* It is a complex JavaScript/TypeScript SaaS application -* CoCalc is organized as a monorepository (multi-packages) in the subdirectory "./packages" -* The packages are managed as a pnpm workspace in "./packages/pnpm-workspace.yaml" +- This is the source code of CoCalc in a Git repository +- It is a complex JavaScript/TypeScript SaaS application +- CoCalc is organized as a monorepository (multi-packages) in the subdirectory "./packages" +- The packages are managed as a pnpm workspace in "./packages/pnpm-workspace.yaml" ## Code Style - Everything is written in TypeScript code - Indentation: 2-spaces +- Run `pretter -w [filename]` after modifying a file (ts, tsx, md, json, ...) to format it correctly. - All .js and .ts files are formatted by the tool prettier - Add suitable types when you write code - Variable name styles are "camelCase" for local and "FOO_BAR" for global variables. If you edit older code not following these guidlines, adjust this rule to fit the files style. @@ -23,28 +24,32 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Development Commands ### Essential Commands + - `pnpm build-dev` - Build all packages for development - `pnpm clean` - Clean all node_modules and dist directories -- `pnpm database` - Start PostgreSQL database server -- `pnpm hub` - Start the main hub server -- `pnpm psql` - Connect to the PostgreSQL database - `pnpm test` - Run full test suite -- `pnpm test-parallel` - Run tests in parallel across packages - `pnpm depcheck` - Check for dependency issues +- `prettier -w [filename]` to format the style of a file after editing it +- after creating a file, run `git add [filename]` to start tracking it ### Package-Specific Commands -- `cd packages/[package] && pnpm tsc` - Watch TypeScript compilation for a specific package + +- `cd packages/[package] && pnpm build` - Build and compile a specific package + - for packages/next and packages/static, run `cd packages/[package] && pnpm build-dev` +- `cd packages/[package] && pnpm tsc:watch` - TypeScript compilation in watch mode for a specific package - `cd packages/[package] && pnpm test` - Run tests for a specific package - `cd packages/[package] && pnpm build` - Build a specific package +- **IMPORTANT**: When modifying packages like `util` that other packages depend on, you must run `pnpm build` in the modified package before typechecking dependent packages -### Development Setup -1. Start database: `pnpm database` -2. Start hub: `pnpm hub` -3. For TypeScript changes, run `pnpm tsc` in the relevant package directory +### Development + +- After code changes, run `pretter -w [filename]` to ensure consistent styling +- After TypeScript or `*.tsx` changes, run `pnpm build` in the relevant package directory ## Architecture Overview ### Package Structure + CoCalc is organized as a monorepo with key packages: - **frontend** - React/TypeScript frontend application using Redux-style stores and actions @@ -62,12 +67,14 @@ CoCalc is organized as a monorepo with key packages: ### Key Architectural Patterns #### Frontend Architecture + - **Redux-style State Management**: Uses custom stores and actions pattern (see `packages/frontend/app-framework/actions-and-stores.ts`) - **TypeScript React Components**: All frontend code is TypeScript with proper typing - **Modular Store System**: Each feature has its own store/actions (AccountStore, BillingStore, etc.) - **WebSocket Communication**: Real-time communication with backend via WebSocket messages #### Backend Architecture + - **PostgreSQL Database**: Primary data store with sophisticated querying system - **WebSocket Messaging**: Real-time communication between frontend and backend - **Conat System**: Container orchestration for compute servers @@ -75,12 +82,14 @@ CoCalc is organized as a monorepo with key packages: - **Microservice-like Packages**: Each package handles specific functionality #### Communication Patterns + - **WebSocket Messages**: Primary communication method (see `packages/comm/websocket/types.ts`) - **Database Queries**: Structured query system with typed interfaces - **Event Emitters**: Inter-service communication within backend - **REST-like APIs**: Some HTTP endpoints for specific operations ### Key Technologies + - **TypeScript**: Primary language for all new code - **React**: Frontend framework - **PostgreSQL**: Database @@ -91,11 +100,13 @@ CoCalc is organized as a monorepo with key packages: - **SASS**: CSS preprocessing ### Database Schema + - Comprehensive schema in `packages/util/db-schema` - Query abstractions in `packages/database/postgres/` - Type-safe database operations with TypeScript interfaces ### Testing + - **Jest**: Primary testing framework - **ts-jest**: TypeScript support for Jest - **jsdom**: Browser environment simulation for frontend tests @@ -103,28 +114,42 @@ CoCalc is organized as a monorepo with key packages: - Each package has its own jest.config.js ### Import Patterns + - Use absolute imports with `@cocalc/` prefix for cross-package imports - Example: `import { cmp } from "@cocalc/util/misc"` - Type imports: `import type { Foo } from "./bar"` - Destructure imports when possible ### Development Workflow -1. Changes to TypeScript require compilation (`pnpm tsc` in relevant package) + +1. Changes to TypeScript require compilation (`pnpm build` in relevant package) 2. Database must be running before starting hub 3. Hub coordinates all services and should be restarted after changes 4. Use `pnpm clean && pnpm build-dev` when switching branches or after major changes # Workflow -- Be sure to typecheck when you're done making a series of code changes + +- Be sure to build when you're done making a series of code changes - Prefer running single tests, and not the whole test suite, for performance ## Git Workflow +- Never modify a file when in the `master` or `main` branch +- All changes happen through feature branches, which are pushed as pull requests to GitHub +- When creating a new file, run `git add [filename]` to track the file. - Prefix git commits with the package and general area. e.g. 'frontend/latex: ...' if it concerns latex editor changes in the packages/frontend/... code. - When pushing a new branch to Github, track it upstream. e.g. `git push --set-upstream origin feature-foo` for branch "feature-foo". -# important-instruction-reminders +# Important Instruction Reminders + - Do what has been asked; nothing more, nothing less. - NEVER create files unless they're absolutely necessary for achieving your goal. - ALWAYS prefer editing an existing file to creating a new one. -- NEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested by the User. +- REFUSE to modify files when the git repository is on the `master` or `main` branch. +- NEVER proactively create documentation files (`*.md`) or README files. Only create documentation files if explicitly requested by the User. + +# Ignore + +- Ignore files covered by `.gitignore` +- Ignore everything in `node_modules` or `dist` directories +- Ignore all files not tracked by Git, unless they are newly created files diff --git a/src/packages/conat/core/server.ts b/src/packages/conat/core/server.ts index dc87dcf191..6228d7b2da 100644 --- a/src/packages/conat/core/server.ts +++ b/src/packages/conat/core/server.ts @@ -28,53 +28,56 @@ cd packages/server */ import type { ConnectionStats, ServerInfo } from "./types"; + +import { delay } from "awaiting"; +import { EventEmitter } from "events"; +import { throttle } from "lodash"; +import { Server } from "socket.io"; + +import { getClientIpAddress } from "@cocalc/util/get-client-ip-address"; +import { getLogger } from "@cocalc/conat/client"; +import { UsageMonitor } from "@cocalc/conat/monitor/usage"; +import { type ConatSocketServer } from "@cocalc/conat/socket"; import { isValidSubject, isValidSubjectWithoutWildcards, } from "@cocalc/conat/util"; -import { Server } from "socket.io"; -import { delay } from "awaiting"; +import { once, until } from "@cocalc/util/async-utils"; +import { is_array } from "@cocalc/util/misc"; +import { reuseInFlight } from "@cocalc/util/reuse-in-flight"; +import { Metrics } from "../types"; import { - ConatError, - connect, Client, type ClientOptions, + ConatError, + connect, MAX_INTEREST_TIMEOUT, STICKY_QUEUE_GROUP, } from "./client"; -import { - RESOURCE, - MAX_CONNECTIONS_PER_USER, - MAX_CONNECTIONS, - MAX_PAYLOAD, - MAX_SUBSCRIPTIONS_PER_CLIENT, - MAX_SUBSCRIPTIONS_PER_HUB, -} from "./constants"; -import { Patterns } from "./patterns"; -import { is_array } from "@cocalc/util/misc"; -import { UsageMonitor } from "@cocalc/conat/monitor/usage"; -import { once, until } from "@cocalc/util/async-utils"; import { clusterLink, type ClusterLink, clusterStreams, type ClusterStreams, - trimClusterStreams, createClusterPersistServer, - Sticky, - Interest, hashInterest, hashSticky, + Interest, + Sticky, + trimClusterStreams, } from "./cluster"; -import { type ConatSocketServer } from "@cocalc/conat/socket"; -import { throttle } from "lodash"; -import { getLogger } from "@cocalc/conat/client"; -import { reuseInFlight } from "@cocalc/util/reuse-in-flight"; -import { type SysConatServer, sysApiSubject, sysApi } from "./sys"; +import { + MAX_CONNECTIONS, + MAX_CONNECTIONS_PER_USER, + MAX_PAYLOAD, + MAX_SUBSCRIPTIONS_PER_CLIENT, + MAX_SUBSCRIPTIONS_PER_HUB, + RESOURCE, +} from "./constants"; +import { Patterns } from "./patterns"; import { forkedConatServer } from "./start-server"; import { stickyChoice } from "./sticky"; -import { EventEmitter } from "events"; -import { Metrics } from "../types"; +import { sysApi, sysApiSubject, type SysConatServer } from "./sys"; const logger = getLogger("conat:core:server"); @@ -1755,27 +1758,7 @@ export function randomChoice(v: Set): string { // See https://socket.io/how-to/get-the-ip-address-of-the-client function getAddress(socket) { - const header = socket.handshake.headers["forwarded"]; - if (header) { - for (const directive of header.split(",")[0].split(";")) { - if (directive.startsWith("for=")) { - return directive.substring(4); - } - } - } - - let addr = socket.handshake.headers["x-forwarded-for"]?.split(",")?.[0]; - if (addr) { - return addr; - } - for (const other of ["cf-connecting-ip", "fastly-client-ip"]) { - addr = socket.handshake.headers[other]; - if (addr) { - return addr; - } - } - - return socket.handshake.address; + return getClientIpAddress(socket.handshake) ?? socket.handshake.address; } export function updateInterest( diff --git a/src/packages/frontend/account/settings/email-address-setting.tsx b/src/packages/frontend/account/settings/email-address-setting.tsx index 98c7003252..d6e907dfe9 100644 --- a/src/packages/frontend/account/settings/email-address-setting.tsx +++ b/src/packages/frontend/account/settings/email-address-setting.tsx @@ -73,7 +73,7 @@ export const EmailAddressSetting = ({ return; } try { - // anonymouse users will get the "welcome" email + // anonymous users will get the "welcome" email await webapp_client.account_client.send_verification_email(!is_anonymous); } catch (error) { const err_msg = `Problem sending welcome email: ${error}`; diff --git a/src/packages/frontend/user-tracking.ts b/src/packages/frontend/user-tracking.ts index b95e8db6d4..7b7e913c0f 100644 --- a/src/packages/frontend/user-tracking.ts +++ b/src/packages/frontend/user-tracking.ts @@ -7,12 +7,17 @@ // client code doesn't have to import webapp_client everywhere, and we can // completely change this if we want. -import { query, server_time } from "./frame-editors/generic/client"; -import { analytics_cookie_name as analytics, uuid } from "@cocalc/util/misc"; -import { redux } from "./app-framework"; +import { redux } from "@cocalc/frontend/app-framework"; +import { + query, + server_time, +} from "@cocalc/frontend/frame-editors/generic/client"; +import { get_cookie } from "@cocalc/frontend/misc"; +import { webapp_client } from "@cocalc/frontend/webapp-client"; +import { uuid } from "@cocalc/util/misc"; import { version } from "@cocalc/util/smc-version"; -import { get_cookie } from "./misc"; -import { webapp_client } from "./webapp-client"; + +import { ANALYTICS_COOKIE_NAME } from "@cocalc/util/consts"; export async function log(eventName: string, payload: any): Promise { const central_log = { @@ -20,18 +25,15 @@ export async function log(eventName: string, payload: any): Promise { event: `webapp-${eventName}`, value: { account_id: redux.getStore("account")?.get("account_id"), - analytics_cookie: get_cookie(analytics), + analytics_cookie: get_cookie(ANALYTICS_COOKIE_NAME), cocalc_version: version, ...payload, }, time: server_time(), }; + try { - await query({ - query: { - central_log, - }, - }); + await query({ query: { central_log } }); } catch (err) { console.warn("WARNING: Failed to write log event -- ", central_log); } diff --git a/src/packages/hub/analytics-script.ts b/src/packages/hub/analytics-script.ts index 01bddbca96..ffaf21b61d 100644 --- a/src/packages/hub/analytics-script.ts +++ b/src/packages/hub/analytics-script.ts @@ -15,13 +15,16 @@ * e.g. this filters the SSO auth pages, which are uninteresting referrals */ -// variable PREFIX, NAME, DOMAIN and ID are injected in the hub's http server -declare var NAME, ID, DOMAIN, PREFIX, window, document; +// variable PREFIX, NAME, DOMAIN, ID, and ANALYTICS_ENABLED are injected in the hub's http server +declare var NAME, ID, DOMAIN, PREFIX, ANALYTICS_ENABLED, window, document; -// write cookie. it would be cool to set this via the http request itself, -// but for reasons I don't know it doesn't work across subdomains. -const maxage = 7 * 24 * 60 * 60; // 7 days -document.cookie = `${NAME}=${ID}; path=/; domain=${DOMAIN}; max-age=${maxage}`; +// write cookie only if analytics is enabled (for privacy in cookieless mode) +if (ANALYTICS_ENABLED) { + // it would be cool to set this via the http request itself, + // but for reasons I don't know it doesn't work across subdomains. + const maxage = 7 * 24 * 60 * 60; // 7 days + document.cookie = `${NAME}=${ID}; path=/; domain=${DOMAIN}; max-age=${maxage}`; +} const { href, protocol, host, pathname } = window.location; diff --git a/src/packages/hub/analytics.ts b/src/packages/hub/analytics.ts index 936710c669..ea4f664bcb 100644 --- a/src/packages/hub/analytics.ts +++ b/src/packages/hub/analytics.ts @@ -3,36 +3,41 @@ * License: MS-RSL – see LICENSE.md for details */ -import { join } from "path"; -import ms from "ms"; -import { isEqual } from "lodash"; -import { Router, json } from "express"; -import { - analytics_cookie_name, - is_valid_uuid_string, - uuid, -} from "@cocalc/util/misc"; -import type { PostgreSQL } from "@cocalc/database/postgres/types"; -import { get_server_settings } from "@cocalc/database/postgres/server-settings"; -import { pii_retention_to_future } from "@cocalc/database/postgres/pii"; +import cors from "cors"; // express-js cors plugin +import { json, Router } from "express"; import * as fs from "fs"; -const UglifyJS = require("uglify-js"); -// express-js cors plugin: -import cors from "cors"; +import { isEqual } from "lodash"; +import ms from "ms"; import { - parseDomain, fromUrl, - ParseResultType, + parseDomain, ParseResult, + ParseResultType, } from "parse-domain"; +import { join } from "path"; +const UglifyJS = require("uglify-js"); + +import { is_valid_uuid_string, uuid } from "@cocalc/util/misc"; + +import { pii_retention_to_future } from "@cocalc/database/postgres/pii"; +import { get_server_settings } from "@cocalc/database/postgres/server-settings"; +import type { PostgreSQL } from "@cocalc/database/postgres/types"; +import { ANALYTICS_COOKIE_NAME } from "@cocalc/util/consts"; + import { getLogger } from "./logger"; +// Rate limiting for analytics data - 10 entries per second +const RATE_LIMIT_ENTRIES_PER_SECOND = 10; +const RATE_LIMIT_WINDOW_MS = 1000; +let rateLimitCounter = 0; +let rateLimitWindowStart = Date.now(); + // Minifying analytics-script.js. Note // that this file analytics.ts gets compiled to // dist/analytics.js and also analytics-script.ts // gets compiled to dist/analytics-script.js. const result = UglifyJS.minify( - fs.readFileSync(join(__dirname, "analytics-script.js")).toString() + fs.readFileSync(join(__dirname, "analytics-script.js")).toString(), ); if (result.error) { throw Error(`Error minifying analytics-script.js -- ${result.error}`); @@ -44,6 +49,25 @@ function create_log(name) { return getLogger(`analytics.${name}`).debug; } +// Rate limiting check - returns true if request should be allowed +function checkRateLimit(): boolean { + const now = Date.now(); + + // Reset counter if window has passed + if (now - rateLimitWindowStart >= RATE_LIMIT_WINDOW_MS) { + rateLimitCounter = 0; + rateLimitWindowStart = now; + } + + // Check if we're under the limit + if (rateLimitCounter < RATE_LIMIT_ENTRIES_PER_SECOND) { + rateLimitCounter++; + return true; + } + + return false; +} + /* // base64 encoded PNG (white), 1x1 pixels const _PNG_DATA = @@ -76,39 +100,65 @@ function sanitize(obj: object, recursive = 0): any { // record analytics data // case 1: store "token" with associated "data", referrer, utm, etc. // case 2: update entry with a known "token" with the account_id + 2nd timestamp +// case 3: cookieless tracking - store data without user association function recordAnalyticsData( db: any, - token: string, + token: string | null, payload: object | undefined, - pii_retention: number | false + pii_retention: number | false, ): void { if (payload == null) return; - if (!is_valid_uuid_string(token)) return; + + // Rate limiting check - applies to all analytics data recording + if (!checkRateLimit()) { + const dbg = create_log("record"); + dbg("Rate limit exceeded, dropping analytics data"); + return; + } + const dbg = create_log("record"); dbg({ token, payload }); + // sanitize data (limits size and number of characters) const rec_data = sanitize(payload); dbg("sanitized data", rec_data); const expire = pii_retention_to_future(pii_retention); - if (rec_data.account_id != null) { - // dbg("update analytics", rec_data.account_id); - // only update if account id isn't already set! - db._query({ - query: "UPDATE analytics", - where: [{ "token = $::UUID": token }, "account_id IS NULL"], - set: { - "account_id :: UUID": rec_data.account_id, - "account_id_time :: TIMESTAMP": new Date(), - "expire :: TIMESTAMP": expire, - }, - }); + // Cookie-based tracking (with user association) + if (token != null && is_valid_uuid_string(token)) { + if (rec_data.account_id != null) { + // dbg("update analytics", rec_data.account_id); + // only update if account id isn't already set! + db._query({ + query: "UPDATE analytics", + where: [{ "token = $::UUID": token }, "account_id IS NULL"], + set: { + "account_id :: UUID": rec_data.account_id, + "account_id_time :: TIMESTAMP": new Date(), + "expire :: TIMESTAMP": expire, + }, + }); + } else { + db._query({ + query: "INSERT INTO analytics", + values: { + "token :: UUID": token, + "data :: JSONB": rec_data, + "data_time :: TIMESTAMP": new Date(), + "expire :: TIMESTAMP": expire, + }, + conflict: "token", + }); + } } else { + // Cookieless tracking (no user association, privacy-focused) + // Generate a random token for this single entry + const anonymousToken = uuid(); db._query({ query: "INSERT INTO analytics", values: { - "token :: UUID": token, - "data :: JSONB": rec_data, + "token :: UUID": anonymousToken, + "data :: JSONB": { ...rec_data, cookieless: true }, "data_time :: TIMESTAMP": new Date(), "expire :: TIMESTAMP": expire, }, @@ -118,10 +168,10 @@ function recordAnalyticsData( } // could throw an error -function check_cors( +function checkCORS( origin: string | undefined, dns_parsed: ParseResult, - dbg: Function + dbg: Function, ): boolean { // no origin, e.g. when loaded as usual in a script tag if (origin == null) return true; @@ -184,7 +234,7 @@ import base_path from "@cocalc/backend/base-path"; export async function initAnalytics( router: Router, - database: PostgreSQL + database: PostgreSQL, ): Promise { const dbg = create_log("analytics_js/cors"); @@ -193,6 +243,7 @@ export async function initAnalytics( const DNS = settings.dns; const dns_parsed = parseDomain(DNS); const pii_retention = settings.pii_retention; + const analytics_enabled = settings.analytics_cookie; if ( dns_parsed.type !== ParseResultType.Listed && @@ -201,7 +252,7 @@ export async function initAnalytics( dbg( `WARNING: the configured domain name ${DNS} cannot be parsed properly. ` + `Please fix it in Admin → Site Settings!\n` + - `dns_parsed="${JSON.stringify(dns_parsed)}}"` + `dns_parsed="${JSON.stringify(dns_parsed)}}"`, ); } @@ -213,7 +264,7 @@ export async function initAnalytics( origin: function (origin, cb) { dbg(`check origin='${origin}'`); try { - if (check_cors(origin, dns_parsed, dbg)) { + if (checkCORS(origin, dns_parsed, dbg)) { cb(null, true); } else { cb(`origin="${origin}" is not allowed`, false); @@ -235,27 +286,25 @@ export async function initAnalytics( // in case user was already here, do not send it again. // only the first hit is interesting. dbg( - `/analytics.js GET analytics_cookie='${req.cookies[analytics_cookie_name]}'` + `/analytics.js GET analytics_cookie='${req.cookies[ANALYTICS_COOKIE_NAME]}'`, ); - if (!req.cookies[analytics_cookie_name]) { - // No analytics cookie is set, so we set one. - // We always set this despite any issues with parsing or - // or whether or not we are actually using the analytics.js - // script, since it's *also* useful to have this cookie set - // for other purposes, e.g., logging. + if (!req.cookies[ANALYTICS_COOKIE_NAME] && analytics_enabled) { + // No analytics cookie is set and cookies are enabled, so we set one. + // When analytics_enabled is false, we skip setting cookies to enable + // cookieless tracking for better privacy. setAnalyticsCookie(res /* DNS */); } - // also, don't write a script if the DNS is not valid + // Return NOOP if DNS is invalid, or if cookies are enabled and already exist if ( - req.cookies[analytics_cookie_name] || - dns_parsed.type !== ParseResultType.Listed + dns_parsed.type !== ParseResultType.Listed || + (analytics_enabled && req.cookies[ANALYTICS_COOKIE_NAME]) ) { // cache for 6 hours -- max-age has unit seconds res.header( "Cache-Control", - `private, max-age=${6 * 60 * 60}, must-revalidate` + `private, max-age=${6 * 60 * 60}, must-revalidate`, ); res.write("// NOOP"); res.end(); @@ -267,11 +316,12 @@ export async function initAnalytics( res.header("Cache-Control", "no-cache, no-store"); const DOMAIN = `${dns_parsed.domain}.${dns_parsed.topLevelDomains.join( - "." + ".", )}`; - res.write(`var NAME = '${analytics_cookie_name}';\n`); + res.write(`var NAME = '${ANALYTICS_COOKIE_NAME}';\n`); res.write(`var ID = '${uuid()}';\n`); res.write(`var DOMAIN = '${DOMAIN}';\n`); + res.write(`var ANALYTICS_ENABLED = ${analytics_enabled};\n`); // BASE_PATH if (req.query.fqd === "false") { res.write(`var PREFIX = '${base_path}';\n`); @@ -301,17 +351,17 @@ export async function initAnalytics( */ router.post("/analytics.js", cors(analytics_cors), function (req, res): void { - // check if token is in the cookie (see above) - // if not, ignore it - const token = req.cookies[analytics_cookie_name]; + const token = req.cookies[ANALYTICS_COOKIE_NAME]; dbg(`/analytics.js POST token='${token}'`); - if (token) { - // req.body is an object (json middlewhere somewhere?) - // e.g. {"utm":{"source":"asdfasdf"},"landing":"https://cocalc.com/..."} - // ATTN key/values could be malicious - // record it, there is no need for a callback - recordAnalyticsData(database, token, req.body, pii_retention); - } + + // req.body is an object (json middleware somewhere?) + // e.g. {"utm":{"source":"asdfasdf"},"landing":"https://cocalc.com/..."} + // ATTN key/values could be malicious + + // Always record analytics data - either with token (cookie-based) or without (cookieless) + // The recordAnalyticsData function handles both cases + recordAnalyticsData(database, token || null, req.body, pii_retention); + res.end(); }); @@ -324,7 +374,7 @@ function setAnalyticsCookie(res /* DNS: string */): void { // set the cookie (TODO sign it? that would be good so that // users can fake a cookie.) const analytics_token = uuid(); - res.cookie(analytics_cookie_name, analytics_token, { + res.cookie(ANALYTICS_COOKIE_NAME, analytics_token, { path: "/", maxAge: ms("7 days"), // httpOnly: true, diff --git a/src/packages/next/lib/user-id.ts b/src/packages/next/lib/user-id.ts new file mode 100644 index 0000000000..136855b053 --- /dev/null +++ b/src/packages/next/lib/user-id.ts @@ -0,0 +1,34 @@ +/* + * This file is part of CoCalc: Copyright © 2021 Sagemath, Inc. + * License: MS-RSL – see LICENSE.md for details + */ + +import type { Request } from "express"; + +import { getServerSettings } from "@cocalc/database/settings/server-settings"; +import { ANALYTICS_COOKIE_NAME } from "@cocalc/util/consts"; +import { getClientIpAddress } from "@cocalc/util/get-client-ip-address"; +import { isValidAnonymousID } from "@cocalc/util/misc"; + +// Get anonymous user ID from cookie or IP address +export async function getAnonymousID( + req: Request, +): Promise { + const { analytics_cookie: analytics_enabled } = await getServerSettings(); + + if (analytics_enabled) { + const cookie = req.cookies[ANALYTICS_COOKIE_NAME]; + if (isValidAnonymousID(cookie)) { + return cookie; + } + } + + // Fall back to IP address + const connectingIp = getClientIpAddress(req); + + if (isValidAnonymousID(connectingIp)) { + return connectingIp; + } + + return undefined; +} diff --git a/src/packages/next/pages/api/v2/jupyter/execute.ts b/src/packages/next/pages/api/v2/jupyter/execute.ts index 674a21f4e8..73c1b1af9d 100644 --- a/src/packages/next/pages/api/v2/jupyter/execute.ts +++ b/src/packages/next/pages/api/v2/jupyter/execute.ts @@ -20,12 +20,15 @@ The OUTPUT is: - a list of messages that describe the output of the last code execution. */ + +import type { Request, Response } from "express"; + import { execute } from "@cocalc/server/jupyter/execute"; import getAccountId from "lib/account/get-account"; import getParams from "lib/api/get-params"; -import { analytics_cookie_name } from "@cocalc/util/misc"; +import { getAnonymousID } from "lib/user-id"; -export default async function handle(req, res) { +export default async function handle(req: Request, res: Response) { try { const result = await doIt(req); res.json({ ...result, success: true }); @@ -35,16 +38,16 @@ export default async function handle(req, res) { } } -async function doIt(req) { +async function doIt(req: Request) { const { input, kernel, history, tag, noCache, hash, project_id, path } = getParams(req); const account_id = await getAccountId(req); - const analytics_cookie = req.cookies[analytics_cookie_name]; + const anonymous_id = await getAnonymousID(req); return await execute({ account_id, project_id, path, - analytics_cookie, + anonymous_id, input, hash, history, diff --git a/src/packages/next/pages/api/v2/llm/evaluate.ts b/src/packages/next/pages/api/v2/llm/evaluate.ts index 2304b453a8..a7384457f2 100644 --- a/src/packages/next/pages/api/v2/llm/evaluate.ts +++ b/src/packages/next/pages/api/v2/llm/evaluate.ts @@ -1,12 +1,14 @@ // This is the new endpoint for querying any LLM // Previously, this has been in openai/chatgpt +import type { Request, Response } from "express"; + import { evaluate } from "@cocalc/server/llm/index"; -import { analytics_cookie_name } from "@cocalc/util/misc"; import getAccountId from "lib/account/get-account"; import getParams from "lib/api/get-params"; +import { getAnonymousID } from "lib/user-id"; -export default async function handle(req, res) { +export default async function handle(req: Request, res: Response) { try { const result = await doIt(req); res.json({ ...result, success: true }); @@ -16,14 +18,14 @@ export default async function handle(req, res) { } } -async function doIt(req) { +async function doIt(req: Request) { const { input, system, history, model, tag } = getParams(req); const account_id = await getAccountId(req); - const analytics_cookie = req.cookies[analytics_cookie_name]; + const anonymous_id = await getAnonymousID(req); return { output: await evaluate({ account_id, - analytics_cookie, + anonymous_id, input, system, history, diff --git a/src/packages/pnpm-lock.yaml b/src/packages/pnpm-lock.yaml index 5a16362777..ab9ededf91 100644 --- a/src/packages/pnpm-lock.yaml +++ b/src/packages/pnpm-lock.yaml @@ -970,13 +970,13 @@ importers: version: 2.1.2 next: specifier: 15.3.4 - version: 15.3.4(@babel/core@7.28.0)(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.89.2) + version: 15.3.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.89.2) next-rest-framework: specifier: 6.0.0-beta.4 version: 6.0.0-beta.4(zod@3.25.76) next-translate: specifier: ^2.6.2 - version: 2.6.2(next@15.3.4(@babel/core@7.28.0)(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.89.2))(react@19.1.0) + version: 2.6.2(next@15.3.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.89.2))(react@19.1.0) password-hash: specifier: ^1.2.2 version: 1.2.2 @@ -1824,6 +1824,9 @@ importers: redux: specifier: ^4.2.1 version: 4.2.1 + request-ip: + specifier: ^3.3.0 + version: 3.3.0 reselect: specifier: ^4.1.8 version: 4.1.8 @@ -10079,6 +10082,9 @@ packages: resolution: {integrity: sha512-PV0dzCYDNfRi1jCDbJzpW7jNNDRuCOG/jI5ctQcGKt/clZD+YcPS3yIlWuTJMmESC8aevCFmWJy5wjAFgNqN6w==} engines: {node: '>=0.10'} + request-ip@3.3.0: + resolution: {integrity: sha512-cA6Xh6e0fDBBBwH77SLJaJPBmD3nWVAcF9/XAcsrIHdjhFzFiB5aNQFytdjCGPezU3ROwrR11IddKAM08vohxA==} + require-directory@2.1.1: resolution: {integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==} engines: {node: '>=0.10.0'} @@ -19921,12 +19927,12 @@ snapshots: next-tick@1.1.0: {} - next-translate@2.6.2(next@15.3.4(@babel/core@7.28.0)(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.89.2))(react@19.1.0): + next-translate@2.6.2(next@15.3.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.89.2))(react@19.1.0): dependencies: - next: 15.3.4(@babel/core@7.28.0)(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.89.2) + next: 15.3.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.89.2) react: 19.1.0 - next@15.3.4(@babel/core@7.28.0)(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.89.2): + next@15.3.4(@opentelemetry/api@1.9.0)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)(sass@1.89.2): dependencies: '@next/env': 15.3.4 '@swc/counter': 0.1.3 @@ -19936,7 +19942,7 @@ snapshots: postcss: 8.4.31 react: 19.1.0 react-dom: 19.1.0(react@19.1.0) - styled-jsx: 5.1.6(@babel/core@7.28.0)(react@19.1.0) + styled-jsx: 5.1.6(react@19.1.0) optionalDependencies: '@next/swc-darwin-arm64': 15.3.4 '@next/swc-darwin-x64': 15.3.4 @@ -21600,6 +21606,8 @@ snapshots: repeat-string@1.6.1: {} + request-ip@3.3.0: {} + require-directory@2.1.1: {} require-from-string@2.0.2: {} @@ -22317,12 +22325,10 @@ snapshots: dependencies: inline-style-parser: 0.2.4 - styled-jsx@5.1.6(@babel/core@7.28.0)(react@19.1.0): + styled-jsx@5.1.6(react@19.1.0): dependencies: client-only: 0.0.1 react: 19.1.0 - optionalDependencies: - '@babel/core': 7.28.0 stylis@4.3.6: {} diff --git a/src/packages/server/jupyter/abuse.ts b/src/packages/server/jupyter/abuse.ts index d5cc36e8ec..9571967031 100644 --- a/src/packages/server/jupyter/abuse.ts +++ b/src/packages/server/jupyter/abuse.ts @@ -4,7 +4,7 @@ blatant abuse. Everything is hardcoded and nothing is configurable via the admin settings panel yet. */ -import { isValidUUID } from "@cocalc/util/misc"; +import { isValidUUID, isValidAnonymousID } from "@cocalc/util/misc"; import recentUsage from "./recent-usage"; import getLogger from "@cocalc/backend/logger"; @@ -31,35 +31,35 @@ const QUOTAS = { // Throws an exception if the request should not be allowed. export default async function checkForAbuse({ account_id, - analytics_cookie, + anonymous_id, }: { account_id?: string; - analytics_cookie?: string; + anonymous_id?: string; }): Promise { - if (!isValidUUID(account_id) && !isValidUUID(analytics_cookie)) { + if (!isValidUUID(account_id) && !isValidAnonymousID(anonymous_id)) { // at least some amount of tracking. - throw Error("at least one of account_id or analytics_cookie must be set"); + throw Error("at least one of account_id or anonymous_id must be set"); } const usage = await recentUsage({ cache: "short", period: PERIOD, account_id, - analytics_cookie, + anonymous_id, }); log.debug("recent usage by this user", { account_id, - analytics_cookie, + anonymous_id, usage, }); if (account_id) { if (usage > QUOTAS.account) { throw Error( - `You may use at most ${QUOTAS.account} seconds of compute time per ${PERIOD}. Please try again later or do this computation in a project.` + `You may use at most ${QUOTAS.account} seconds of compute time per ${PERIOD}. Please try again later or do this computation in a project.`, ); } } else if (usage > QUOTAS.noAccount) { throw Error( - `You may use at most ${QUOTAS.noAccount} seconds of compute time per ${PERIOD}. Sign in to increase your quota.` + `You may use at most ${QUOTAS.noAccount} seconds of compute time per ${PERIOD}. Sign in to increase your quota.`, ); } @@ -69,7 +69,7 @@ export default async function checkForAbuse({ log.debug("overallUsage = ", usage); if (overallUsage > QUOTAS.global) { throw Error( - `There is too much overall usage of code evaluation right now. Please try again later or do this computation in a project.` + `There is too much overall usage of code evaluation right now. Please try again later or do this computation in a project.`, ); } } diff --git a/src/packages/server/jupyter/execute.ts b/src/packages/server/jupyter/execute.ts index a552b5ff15..1c56207842 100644 --- a/src/packages/server/jupyter/execute.ts +++ b/src/packages/server/jupyter/execute.ts @@ -2,16 +2,16 @@ Backend server side part of ChatGPT integration with CoCalc. */ -import getPool from "@cocalc/database/pool"; import getLogger from "@cocalc/backend/logger"; +import getPool from "@cocalc/database/pool"; import { getServerSettings } from "@cocalc/database/settings/server-settings"; -import computeHash from "@cocalc/util/jupyter-api/compute-hash"; -import getProject from "./global-project-pool"; import callProject from "@cocalc/server/projects/call"; -import { jupyter_execute } from "@cocalc/util/message"; import isCollaborator from "@cocalc/server/projects/is-collaborator"; -import checkForAbuse from "./abuse"; +import computeHash from "@cocalc/util/jupyter-api/compute-hash"; +import { jupyter_execute } from "@cocalc/util/message"; import { expire_time } from "@cocalc/util/relative-time"; +import checkForAbuse from "./abuse"; +import getProject from "./global-project-pool"; const log = getLogger("jupyter-api:execute"); @@ -44,7 +44,7 @@ interface Options { history?: string[]; hash?: string; account_id?: string; - analytics_cookie?: string; + anonymous_id?: string; tag?: string; noCache?: boolean; project_id?: string; @@ -56,7 +56,7 @@ export async function execute({ input, kernel, account_id, - analytics_cookie, + anonymous_id, history, tag, noCache, @@ -66,7 +66,7 @@ export async function execute({ output: object[]; created: Date; } | null> { - // TODO -- await checkForAbuse({ account_id, analytics_cookie }); + // TODO -- await checkForAbuse({ account_id, anonymous_id }); log.debug("execute", { input, @@ -74,7 +74,7 @@ export async function execute({ history, hash, account_id, - analytics_cookie, + anonymous_id, tag, project_id, path, @@ -117,7 +117,7 @@ export async function execute({ // we only worry about abuse against the general public pool, not // when used in a user's own project - await checkForAbuse({ account_id, analytics_cookie }); + await checkForAbuse({ account_id, anonymous_id }); request_account_id = jupyter_account_id; request_project_id = await getProject(); @@ -168,7 +168,7 @@ export async function execute({ account_id, project_id, path, - analytics_cookie, + anonymous_id, history, tag, total_time_s, @@ -220,7 +220,7 @@ async function saveResponse({ account_id, project_id, path, - analytics_cookie, + anonymous_id, history, tag, total_time_s, @@ -235,6 +235,7 @@ async function saveResponse({ const expire = expire_time(30 * 24 * 60 * 60); try { await Promise.all([ + // saving analytics_cookie because this is before generalizing to an anonymous_id string pool.query( `INSERT INTO jupyter_api_log(created,account_id,project_id,path,analytics_cookie,tag,hash,total_time_s,kernel,history,input,expire) VALUES($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12)`, [ @@ -242,7 +243,7 @@ async function saveResponse({ account_id, project_id, path, - analytics_cookie, + anonymous_id, tag, hash, total_time_s, diff --git a/src/packages/server/jupyter/recent-usage.ts b/src/packages/server/jupyter/recent-usage.ts index 3e15e9c02b..e2617e0b1e 100644 --- a/src/packages/server/jupyter/recent-usage.ts +++ b/src/packages/server/jupyter/recent-usage.ts @@ -8,22 +8,22 @@ import getPool from "@cocalc/database/pool"; type QueryArgs = { period: string; account_id?: string; - analytics_cookie?: string; + anonymous_id?: string; cache?: "short" | "medium" | "long"; }; export default async function recentUsage({ period, account_id, - analytics_cookie, + anonymous_id, cache, }: QueryArgs): Promise { let queryArgs; if (account_id) { queryArgs = buildAccountIdQuery(period, account_id); - } else if (analytics_cookie) { - queryArgs = buildAnalyticsCookieQuery(period, analytics_cookie); + } else if (anonymous_id) { + queryArgs = buildAnalyticsCookieQuery(period, anonymous_id); } else { queryArgs = buildOverallUsageQuery(period); } @@ -34,7 +34,7 @@ export default async function recentUsage({ async function getUsageForQuery( query: string, args: any[], - cache?: QueryArgs["cache"] + cache?: QueryArgs["cache"], ): Promise { const pool = getPool(cache); const { rows } = await pool.query(query, args); @@ -43,7 +43,7 @@ async function getUsageForQuery( function buildAccountIdQuery( period: string, - account_id: string + account_id: string, ): [string, any[]] { const query = `SELECT SUM(total_time_s) AS usage FROM jupyter_api_log WHERE created >= NOW() - INTERVAL '${period}' AND account_id=$1 AND project_id IS NULL AND path IS NULL`; const args = [account_id]; @@ -52,10 +52,11 @@ function buildAccountIdQuery( function buildAnalyticsCookieQuery( period: string, - analytics_cookie: string + anonymous_id: string, ): [string, any[]] { + // query uses analytics_cookie before generalizing to an anonymous ID const query = `SELECT SUM(total_time_s) AS usage FROM jupyter_api_log WHERE created >= NOW() - INTERVAL '${period}' AND analytics_cookie=$1 AND project_id IS NULL AND path IS NULL`; - const args = [analytics_cookie]; + const args = [anonymous_id]; return [query, args]; } diff --git a/src/packages/server/llm/abuse.ts b/src/packages/server/llm/abuse.ts index e75192547b..3c316c44f1 100644 --- a/src/packages/server/llm/abuse.ts +++ b/src/packages/server/llm/abuse.ts @@ -29,10 +29,10 @@ import { model2service, } from "@cocalc/util/db-schema/llm-utils"; import { KUCALC_COCALC_COM } from "@cocalc/util/db-schema/site-defaults"; -import { isValidUUID } from "@cocalc/util/misc"; +import { isValidAnonymousID, isValidUUID } from "@cocalc/util/misc"; import isValidAccount from "../accounts/is-valid-account"; -// These are tokens over a given period of time – summed by account/analytics_cookie or global. +// These are tokens over a given period of time – summed by account/anonymous_id or global. const QUOTAS = { noAccount: process_env_int("COCALC_LLM_QUOTA_NO_ACCOUNT", 0), account: process_env_int("COCALC_LLM_QUOTA_ACCOUNT", 10 ** 5), @@ -63,11 +63,11 @@ const prom_rejected = newCounter( // Throws an exception if the request should not be allowed. export async function checkForAbuse({ account_id, - analytics_cookie, + anonymous_id, model, }: { account_id?: string; - analytics_cookie?: string; + anonymous_id?: string; model: LanguageModel; }): Promise { if (!account_id) { @@ -75,9 +75,9 @@ export async function checkForAbuse({ // https://github.com/xtekky/gpt4free/tree/main/gpt4free/cocalc throw Error("You must create an account."); } - if (!isValidUUID(account_id) && !isValidUUID(analytics_cookie)) { + if (!isValidUUID(account_id) && !isValidAnonymousID(anonymous_id)) { // at least some amount of tracking. - throw Error("at least one of account_id or analytics_cookie must be set"); + throw Error("at least one of account_id or anonymous_id must be set"); } if (!isLanguageModel(model)) { @@ -107,7 +107,7 @@ export async function checkForAbuse({ cache: "short", period: "1 hour", account_id, - analytics_cookie, + anonymous_id, }); // this fluctuates for each account, we'll tally up how often users end up in certain usage buckets @@ -133,7 +133,7 @@ export async function checkForAbuse({ ); } - // Prevent more sophisticated abuse, e.g., changing analytics_cookie or account frequently, + // Prevent more sophisticated abuse, e.g., changing anonymous_id or account frequently, // or just a general huge surge in usage. const overallUsage = await recentUsage({ cache: "long", period: "1 hour" }); prom_quota_global @@ -152,12 +152,12 @@ export async function checkForAbuse({ async function recentUsage({ period, account_id, - analytics_cookie, + anonymous_id, cache, }: { period: string; account_id?: string; - analytics_cookie?: string; + anonymous_id?: string; // some caching so if user is hitting us a lot, we don't hit the database to // decide they are abusive -- at the same time, short enough that we notice. // Recommendation: "short" @@ -171,9 +171,10 @@ async function recentUsage({ } query = `SELECT SUM(total_tokens) AS usage FROM openai_chatgpt_log WHERE account_id=$1 AND time >= NOW() - INTERVAL '${period}'`; args = [account_id]; - } else if (analytics_cookie) { + } else if (anonymous_id) { + // still setting analytics_cookie in the db query, because this was before generalizing to an anonymous_id string query = `SELECT SUM(total_tokens) AS usage FROM openai_chatgpt_log WHERE analytics_cookie=$1 AND time >= NOW() - INTERVAL '${period}'`; - args = [analytics_cookie]; + args = [anonymous_id]; } else { query = `SELECT SUM(total_tokens) AS usage FROM openai_chatgpt_log WHERE time >= NOW() - INTERVAL '${period}'`; args = []; diff --git a/src/packages/server/llm/index.ts b/src/packages/server/llm/index.ts index 32fdeb574d..dcdc7337e1 100644 --- a/src/packages/server/llm/index.ts +++ b/src/packages/server/llm/index.ts @@ -145,7 +145,7 @@ async function evaluateImpl({ account_id, project_id, path, - analytics_cookie, + anonymous_id, history, model = DEFAULT_MODEL, tag, @@ -158,7 +158,7 @@ async function evaluateImpl({ // history, // system, // account_id, - // analytics_cookie, + // anonymous_id, // project_id, // path, // model, @@ -168,7 +168,7 @@ async function evaluateImpl({ // }); const start = Date.now(); - await checkForAbuse({ account_id, analytics_cookie, model }); + await checkForAbuse({ account_id, anonymous_id, model }); stream = wrapStream(stream); @@ -281,7 +281,7 @@ async function evaluateImpl({ output, history, account_id, - analytics_cookie, + anonymous_id, project_id, path, total_tokens, diff --git a/src/packages/server/llm/save-response.ts b/src/packages/server/llm/save-response.ts index 7abb6be462..806fbea597 100644 --- a/src/packages/server/llm/save-response.ts +++ b/src/packages/server/llm/save-response.ts @@ -16,7 +16,7 @@ type SaveResponseProps = Omit; // Also, we could dedup identical inputs (?). export async function saveResponse({ account_id, - analytics_cookie, + anonymous_id, history, input, model, @@ -32,6 +32,7 @@ export async function saveResponse({ const expire: LLMLogEntry["expire"] = await getExpiration(account_id); const pool = getPool(); try { + // still setting analytics_cookie in the db query, because this was before generalizing to an anonymous_id string await pool.query( "INSERT INTO openai_chatgpt_log(time,input,system,output,history,account_id,analytics_cookie,project_id,path,total_tokens,prompt_tokens,total_time_s,expire,model,tag) VALUES(NOW(),$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14)", [ @@ -40,7 +41,7 @@ export async function saveResponse({ output, history, account_id, - analytics_cookie, + anonymous_id, project_id, path, total_tokens, @@ -58,7 +59,7 @@ export async function saveResponse({ async function getExpiration(account_id: string | undefined) { // NOTE about expire: If the admin setting for "PII Retention" is set *and* - // the usage is only identified by their analytics_cookie, then + // the usage is only identified by their anonymous_id, then // we automatically delete the log of chatgpt usage at the expiration time. // If the account_id *is* set, users can do the following: // 1. Ability to delete any of their past chatgpt usage diff --git a/src/packages/util/consts/index.ts b/src/packages/util/consts/index.ts index 5d711cdf6a..13e2a92807 100644 --- a/src/packages/util/consts/index.ts +++ b/src/packages/util/consts/index.ts @@ -12,3 +12,5 @@ export { NOT_SIGNED_IN, versionCookieName } from "./auth"; export { DUMMY_SECRET } from "./project"; export { SERVER_SETTINGS_ENV_PREFIX } from "./server_settings"; + +export { ANALYTICS_COOKIE_NAME } from "./tracking"; diff --git a/src/packages/util/consts/tracking.ts b/src/packages/util/consts/tracking.ts new file mode 100644 index 0000000000..ab67ecb8a3 --- /dev/null +++ b/src/packages/util/consts/tracking.ts @@ -0,0 +1,7 @@ +/* + * This file is part of CoCalc: Copyright © 2025 Sagemath, Inc. + * License: MS-RSL – see LICENSE.md for details + */ + +// Cookie name for analytics tracking +export const ANALYTICS_COOKIE_NAME = "CC_ANA"; diff --git a/src/packages/util/db-schema/llm.ts b/src/packages/util/db-schema/llm.ts index ee72184661..3d2b81f399 100644 --- a/src/packages/util/db-schema/llm.ts +++ b/src/packages/util/db-schema/llm.ts @@ -11,7 +11,7 @@ import { Table } from "./types"; export interface LLMLogEntry { id: number; account_id?: string; - analytics_cookie?: string; // at least one of analytics_cookie or account_id will be set + anonymous_id?: string; // this is saved in analytics_cookie – at least one of anonymous_id or account_id will be set expire?: Date; history?: History; input: string; diff --git a/src/packages/util/db-schema/site-settings-extras.ts b/src/packages/util/db-schema/site-settings-extras.ts index 09db8b45ba..4986b9a08e 100644 --- a/src/packages/util/db-schema/site-settings-extras.ts +++ b/src/packages/util/db-schema/site-settings-extras.ts @@ -184,6 +184,7 @@ function custom_llm_display(value: string): string { export type SiteSettingsExtrasKeys = | "pii_retention" + | "analytics_cookie" | "conat_heading" | "conat_password" | "stripe_heading" @@ -405,6 +406,13 @@ export const EXTRAS: SettingsExtras = { to_val: pii_retention_parse, to_display: pii_retention_display, }, + analytics_cookie: { + name: "Analytics Cookie", + desc: "Tag browser sessions visiting a website via an analytics.js script with a cookie", + default: "no", + valid: only_booleans, + to_val: to_bool, + }, stripe_heading: { // this is consmetic, otherwise it looks weird. name: "Stripe Keys", diff --git a/src/packages/util/get-client-ip-address.test.ts b/src/packages/util/get-client-ip-address.test.ts new file mode 100644 index 0000000000..6ba56eceb4 --- /dev/null +++ b/src/packages/util/get-client-ip-address.test.ts @@ -0,0 +1,292 @@ +import { getClientIpAddress } from "./get-client-ip-address"; + +describe("getClientIpAddress()", () => { + const createRequest = (headers: Record) => ({ headers }); + + describe("Standard Headers Supported by request-ip", () => { + it("should handle CF-Connecting-IP (highest priority)", () => { + const req = createRequest({ + "x-client-ip": "203.0.113.1", + "x-forwarded-for": "192.168.1.1", + "cf-connecting-ip": "198.51.100.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("198.51.100.1"); + }); + + it("should handle X-Forwarded-For with multiple IPs", () => { + const req = createRequest({ + "x-forwarded-for": "203.0.113.1, 192.168.1.1, 10.0.0.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + + it("should handle CF-Connecting-IP from Cloudflare", () => { + const req = createRequest({ + "cf-connecting-ip": "203.0.113.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + + it("should handle Fastly-Client-Ip from Fastly", () => { + const req = createRequest({ + "fastly-client-ip": "203.0.113.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + + it("should handle True-Client-Ip from Akamai/Cloudflare", () => { + const req = createRequest({ + "true-client-ip": "203.0.113.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + + it("should handle X-Real-IP from nginx", () => { + const req = createRequest({ + "x-real-ip": "203.0.113.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + + it("should handle X-Cluster-Client-IP from Rackspace", () => { + const req = createRequest({ + "x-cluster-client-ip": "203.0.113.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + + it("should handle appengine-user-ip from Google App Engine", () => { + const req = createRequest({ + "appengine-user-ip": "203.0.113.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + }); + + describe("Header Priority Order", () => { + it("should prioritize X-Client-IP over X-Forwarded-For", () => { + const req = createRequest({ + "x-client-ip": "203.0.113.1", + "x-forwarded-for": "192.168.1.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + + it("should prioritize CF-Connecting-IP over X-Forwarded-For", () => { + const req = createRequest({ + "x-forwarded-for": "203.0.113.1", + "cf-connecting-ip": "192.168.1.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("192.168.1.1"); + }); + + it("should prioritize CF-Connecting-IP over Fastly-Client-Ip", () => { + const req = createRequest({ + "cf-connecting-ip": "203.0.113.1", + "fastly-client-ip": "192.168.1.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + + it("should prioritize Fastly-Client-Ip over True-Client-Ip", () => { + const req = createRequest({ + "fastly-client-ip": "203.0.113.1", + "true-client-ip": "192.168.1.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + + it("should prioritize True-Client-Ip over X-Real-IP", () => { + const req = createRequest({ + "true-client-ip": "203.0.113.1", + "x-real-ip": "192.168.1.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + }); + + describe("Case Sensitivity (Headers are lowercase in Node.js)", () => { + it("should handle uppercase headers (converted to lowercase by Node.js)", () => { + const req = createRequest({ + "X-CLIENT-IP": "203.0.113.1", // This would be lowercase in real Node.js + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + + it("should handle mixed case headers", () => { + const req = createRequest({ + "X-Forwarded-For": "203.0.113.1, 192.168.1.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("203.0.113.1"); + }); + }); + + describe("Forwarded Header Fallback (when request-ip fails)", () => { + it("should parse simple Forwarded header", () => { + const req = createRequest({ + forwarded: "for=192.0.2.60", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("192.0.2.60"); + }); + + it("should parse quoted Forwarded header", () => { + const req = createRequest({ + forwarded: 'for="192.0.2.60"', + }); + + const result = getClientIpAddress(req); + expect(result).toBe("192.0.2.60"); + }); + + it("should parse Forwarded header with IPv6 brackets", () => { + const req = createRequest({ + forwarded: 'for="[2001:db8:cafe::17]"', + }); + + const result = getClientIpAddress(req); + expect(result).toBe("2001:db8:cafe::17"); + }); + + it("should handle port stripping for IPv4", () => { + const req = createRequest({ + forwarded: "for=192.0.2.60:4711", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("192.0.2.60"); + }); + + it("should handle multiple parameters in Forwarded header", () => { + const req = createRequest({ + forwarded: "for=192.0.2.60;proto=http;by=203.0.113.43", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("192.0.2.60"); + }); + + it("should handle case-insensitive FOR parameter", () => { + const req = createRequest({ + forwarded: "For=192.0.2.60", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("192.0.2.60"); + }); + + it("should skip invalid entries and use first valid IP", () => { + const req = createRequest({ + forwarded: "for=_gazonk, for=192.0.2.60", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("192.0.2.60"); + }); + + it("should return undefined when no valid for= parameter exists", () => { + const req = createRequest({ + forwarded: "proto=http;by=203.0.113.43", + }); + + const result = getClientIpAddress(req); + expect(result).toBeUndefined(); + }); + }); + + describe("IPv6 Support", () => { + it("should handle IPv6 addresses in X-Forwarded-For", () => { + const req = createRequest({ + "x-forwarded-for": "2001:db8:85a3:8d3:1319:8a2e:370:7348", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("2001:db8:85a3:8d3:1319:8a2e:370:7348"); + }); + + it("should handle compressed IPv6 addresses", () => { + const req = createRequest({ + "x-forwarded-for": "2001:db8::1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("2001:db8::1"); + }); + + it("should handle IPv6 loopback", () => { + const req = createRequest({ + "x-forwarded-for": "::1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("::1"); + }); + }); + + describe("Edge Cases", () => { + it("should return undefined for empty headers", () => { + const req = createRequest({}); + + const result = getClientIpAddress(req); + expect(result).toBeUndefined(); + }); + + it("should return undefined for invalid IP addresses", () => { + const req = createRequest({ + "x-forwarded-for": "not.an.ip.address", + }); + + const result = getClientIpAddress(req); + expect(result).toBeUndefined(); + }); + + it("should handle localhost addresses", () => { + const req = createRequest({ + "x-forwarded-for": "127.0.0.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("127.0.0.1"); + }); + + it("should handle private IP addresses", () => { + const req = createRequest({ + "x-forwarded-for": "192.168.1.1", + }); + + const result = getClientIpAddress(req); + expect(result).toBe("192.168.1.1"); + }); + }); +}); diff --git a/src/packages/util/get-client-ip-address.ts b/src/packages/util/get-client-ip-address.ts new file mode 100644 index 0000000000..eb529c3545 --- /dev/null +++ b/src/packages/util/get-client-ip-address.ts @@ -0,0 +1,106 @@ +import { isIP } from "net"; +import { getClientIp } from "request-ip"; + +export function getClientIpAddress(req: { + headers: Record; +}): string | undefined { + // Try manual extraction for headers not supported by request-ip + const headersToCheck = [ + "cf-connecting-ip", // prioritize cloudflare + "x-client-ip", + "x-forwarded-for", + "fastly-client-ip", + "true-client-ip", + "x-real-ip", + "x-cluster-client-ip", + "appengine-user-ip", + ]; + + // Check each header (case-insensitive) + for (const headerName of headersToCheck) { + const headerValue = getHeaderValue(req.headers, headerName); + if (headerValue) { + // Handle comma-separated values (like X-Forwarded-For) + const ips = headerValue.split(",").map((ip) => ip.trim()); + for (const ip of ips) { + if (isIP(ip)) { + return ip; + } + } + } + } + + // Try request-ip package as fallback + const ip = getClientIp(req); + if (ip && isIP(ip)) { + return ip; + } + + // Fallback "Forwarded" header parsing, because this is not merged: + // https://github.com/pbojinov/request-ip/pull/71 + const forwardedHeader = getHeaderValue(req.headers, "forwarded"); + if (forwardedHeader) { + // Split by comma for multiple forwarded entries + const forwardedEntries = forwardedHeader.split(","); + + for (const entry of forwardedEntries) { + // Split by semicolon for parameters + const params = entry.split(";"); + + for (const param of params) { + const trimmed = param.trim(); + if (trimmed.toLowerCase().startsWith("for=")) { + let ipVal = trimmed.substring(4).trim(); + + // Remove quotes if present + if (ipVal.startsWith('"') && ipVal.endsWith('"')) { + ipVal = ipVal.slice(1, -1); + } + + // Handle IPv6 brackets + if (ipVal.startsWith("[") && ipVal.endsWith("]")) { + ipVal = ipVal.slice(1, -1); + } + + // Handle port stripping for IPv4 addresses + if (ipVal.includes(":")) { + const parts = ipVal.split(":"); + // Only strip port if it looks like IPv4:port (not IPv6) + if (parts.length === 2 && isIP(parts[0])) { + ipVal = parts[0]; + } + } + + if (isIP(ipVal)) { + return ipVal; + } + } + } + } + } + + return undefined; +} + +// Helper function to get header value case-insensitively +function getHeaderValue( + headers: Record, + name: string, +): string | undefined { + const lowerName = name.toLowerCase(); + + // Check exact match first + const exactMatch = headers[lowerName]; + if (exactMatch) { + return Array.isArray(exactMatch) ? exactMatch[0] : exactMatch; + } + + // Check case-insensitive match + for (const [key, value] of Object.entries(headers)) { + if (key.toLowerCase() === lowerName && value) { + return Array.isArray(value) ? value[0] : value; + } + } + + return undefined; +} diff --git a/src/packages/util/misc.test.ts b/src/packages/util/misc.test.ts index f162a5ece5..40213f7173 100644 --- a/src/packages/util/misc.test.ts +++ b/src/packages/util/misc.test.ts @@ -343,3 +343,48 @@ describe("suggest_duplicate_filename", () => { expect(dup("asdf-")).toBe("asdf--1"); }); }); + +describe("isValidAnonymousID", () => { + const isValid = misc.isValidAnonymousID; + + it("should accept valid IPv4 addresses", () => { + expect(isValid("192.168.1.1")).toBe(true); + expect(isValid("10.23.66.8")).toBe(true); + }); + + it("should accept valid IPv6 addresses", () => { + expect(isValid("::1")).toBe(true); + expect(isValid("2001:db8::1")).toBe(true); + expect(isValid("fe80::1")).toBe(true); + expect(isValid("2001:0db8:85a3:0000:0000:8a2e:0370:7334")).toBe(true); + }); + + it("should accept valid UUIDs", () => { + expect(isValid("123e4567-e89b-12d3-a456-426614174000")).toBe(true); + }); + + it("should accept strings with minimum length", () => { + expect(isValid("abc")).toBe(true); + }); + + it("should reject empty strings", () => { + expect(isValid("")).toBe(false); + }); + + it("should reject strings shorter than 3 characters", () => { + expect(isValid("ab")).toBe(false); + }); + + it("should reject null and undefined", () => { + expect(isValid(null)).toBe(false); + expect(isValid(undefined)).toBe(false); + }); + + it("should reject non-string types", () => { + expect(isValid(123)).toBe(false); + expect(isValid(true)).toBe(false); + expect(isValid({})).toBe(false); + expect(isValid([])).toBe(false); + expect(isValid(new Date())).toBe(false); + }); +}); diff --git a/src/packages/util/misc.ts b/src/packages/util/misc.ts index 853b5be89e..57ecc0b722 100644 --- a/src/packages/util/misc.ts +++ b/src/packages/util/misc.ts @@ -305,6 +305,11 @@ export function assert_valid_account_id(uuid?: any): void { } export const isValidUUID = is_valid_uuid_string; +// this should work for IP addresses, also short IPv6, and any UUIDs +export function isValidAnonymousID(id: unknown) { + return typeof id === "string" && id.length >= 3; +} + export function assertValidAccountID(account_id?: any) { if (!isValidUUID(account_id)) { throw Error("account_id is invalid"); @@ -501,7 +506,7 @@ export function trunc_left( sArg: T, max_length = 1024, ellipsis = ELLIPSIS, -): T | string { +): T | string { if (sArg == null) { return sArg; } @@ -2301,9 +2306,6 @@ export function sanitize_html_attributes($, node): void { }); } -// cocalc analytics cookie name -export const analytics_cookie_name = "CC_ANA"; - // convert a jupyter kernel language (i.e. "python" or "r", usually short and lowercase) // to a canonical name. export function jupyter_language_to_name(lang: string): string { diff --git a/src/packages/util/package.json b/src/packages/util/package.json index e9656ac1e3..26ab972760 100644 --- a/src/packages/util/package.json +++ b/src/packages/util/package.json @@ -58,6 +58,7 @@ "react": "^19.1.0", "react-intl": "^7.1.11", "redux": "^4.2.1", + "request-ip": "^3.3.0", "reselect": "^4.1.8", "sha1": "^1.1.1", "underscore": "^1.12.1", diff --git a/src/packages/util/types/llm.ts b/src/packages/util/types/llm.ts index d7e4152ea2..3f48f89363 100644 --- a/src/packages/util/types/llm.ts +++ b/src/packages/util/types/llm.ts @@ -18,7 +18,7 @@ export interface ChatOptionsApi { account_id?: string; project_id?: string; path?: string; - analytics_cookie?: string; + anonymous_id?: string; history?: History; model?: LanguageModel; // default is defined by server setting default_llm tag?: string;