diff --git a/.playground/nuxt.config.ts b/.playground/nuxt.config.ts
index b4e412b2..7586e4b3 100644
--- a/.playground/nuxt.config.ts
+++ b/.playground/nuxt.config.ts
@@ -14,6 +14,13 @@ export default defineNuxtConfig({
*/
defineNuxtModule({
setup(_, nuxt) {
+ nuxt.hooks.hook('robots:config', (config) => {
+ const catchAll = config.groups.find(g => g.userAgent.includes('*'))
+ if (catchAll) {
+ catchAll.disallow.push('/__link-checker__/')
+ }
+ console.log({ catchAll, groups: config.groups })
+ })
if (!nuxt.options.dev)
return
diff --git a/.playground/pages/index.vue b/.playground/pages/index.vue
index f44fe244..959c1fd5 100644
--- a/.playground/pages/index.vue
+++ b/.playground/pages/index.vue
@@ -1,9 +1,18 @@
+
+
Secret page - not crawlable
+
+ Is Bot: {{ bot }}
+
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 00000000..a6742e48
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,61 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Development Commands
+
+- **Build**: `pnpm build` - Builds the module using nuxt-module-build and generates client
+- **Development**: `pnpm dev` - Runs playground at `.playground` directory
+- **Development Preparation**: `pnpm dev:prepare` - Prepares development environment with stub build
+- **Test**: `pnpm test` - Runs vitest test suite
+- **Lint**: `pnpm lint` - Runs ESLint with auto-fix using @antfu/eslint-config
+- **Type Check**: `pnpm typecheck` - Runs TypeScript compiler for type checking
+- **Client Development**: `pnpm client:dev` - Runs devtools UI client on port 3300
+- **Release**: `pnpm release` - Builds, bumps version, and publishes
+
+## Architecture Overview
+
+This is a Nuxt module (`@nuxtjs/robots`) that provides robots.txt generation and robot meta tag functionality for Nuxt applications.
+
+### Core Module Structure
+
+- **`src/module.ts`**: Main module entry point with module options and setup logic
+- **`src/runtime/`**: Runtime code that gets injected into user applications
+ - **`app/`**: Client-side runtime (composables, plugins)
+ - **`server/`**: Server-side runtime (middleware, routes, composables)
+- **`src/kit.ts`**: Utilities for build-time module functionality
+- **`src/util.ts`**: Shared utilities exported to end users
+
+### Key Runtime Components
+
+- **Server Routes**:
+ - `/robots.txt` route handler in `src/runtime/server/routes/robots-txt.ts`
+ - Debug routes under `/__robots__/` for development
+- **Server Composables**: `getSiteRobotConfig()` and `getPathRobotConfig()` for runtime robot configuration
+- **Client Composables**: `useRobotsRule()` for accessing robot rules in Vue components
+- **Meta Plugin**: Automatically injects robot meta tags and X-Robots-Tag headers
+
+### Build System
+
+- Uses `@nuxt/module-builder` with unbuild configuration in `build.config.ts`
+- Exports multiple entry points: main module, `/util`, and `/content`
+- Supports both ESM and CommonJS via rollup configuration
+
+### Test Structure
+
+- **Integration Tests**: Test fixtures in `test/fixtures/` with full Nuxt apps
+- **Unit Tests**: Focused tests in `test/unit/` for specific functionality
+- Uses `@nuxt/test-utils` for testing Nuxt applications
+- Test environment automatically set to production mode
+
+### Development Workflow
+
+The module supports a playground at `.playground` for local development and manual testing. The client UI (devtools integration) is developed separately in the `client/` directory.
+
+### I18n Integration
+
+The module has special handling for i18n scenarios, with logic in `src/i18n.ts` for splitting paths and handling localized routes.
+
+### Content Integration
+
+Provides integration with Nuxt Content module via `src/content.ts` for content-based robot configurations.
diff --git a/docs/content/3.api/1.config.md b/docs/content/3.api/0.config.md
similarity index 100%
rename from docs/content/3.api/1.config.md
rename to docs/content/3.api/0.config.md
diff --git a/docs/content/3.api/1.nuxt-hooks.md b/docs/content/3.api/robots-config.md
similarity index 89%
rename from docs/content/3.api/1.nuxt-hooks.md
rename to docs/content/3.api/robots-config.md
index 33313932..59c87195 100644
--- a/docs/content/3.api/1.nuxt-hooks.md
+++ b/docs/content/3.api/robots-config.md
@@ -1,10 +1,8 @@
---
-title: Nuxt Hooks
+title: "Hook: robots:config"
description: Learn how to use Nuxt hooks to modify the robots config.
---
-## `'robots:config'`{lang="ts"}
-
**Type:** `(config: ResolvedModuleOptions) => void | Promise`{lang="ts"}
This hook allows you to modify the robots config before it is used to generate the robots.txt and meta tags.
diff --git a/libs/is-bot/.gitignore b/libs/is-bot/.gitignore
new file mode 100644
index 00000000..99afced5
--- /dev/null
+++ b/libs/is-bot/.gitignore
@@ -0,0 +1,8 @@
+node_modules/
+dist/
+*.log
+.DS_Store
+coverage/
+.nyc_output/
+*.tgz
+*.tar.gz
\ No newline at end of file
diff --git a/libs/is-bot/README.md b/libs/is-bot/README.md
new file mode 100644
index 00000000..a8d2a32a
--- /dev/null
+++ b/libs/is-bot/README.md
@@ -0,0 +1,162 @@
+# Bot Detection Library
+
+A framework-agnostic bot detection library with advanced behavioral analysis capabilities.
+
+## Features
+
+- 🤖 **Advanced Bot Detection**: Multi-layered analysis including user agents, behavioral patterns, and timing analysis
+- 🔧 **Framework Agnostic**: Works with any web framework through driver pattern
+- 🚀 **H3/Nuxt Ready**: Built-in support for H3 events and Nuxt applications
+- 📊 **Behavioral Analysis**: Modular system with simple, intermediate, and advanced detection behaviors
+- 💾 **Flexible Storage**: Supports multiple storage backends through adapter pattern
+- 🎯 **High Performance**: Optimized with batch operations and intelligent caching
+- 🛡️ **Security Focused**: IP allowlists/blocklists, rate limiting, and threat detection
+
+## Installation
+
+```bash
+npm install @nuxtjs/robots-bot-detection
+```
+
+## Quick Start
+
+### Basic Usage
+
+```typescript
+import { BotDetectionEngine, MemoryAdapter, H3SessionIdentifier } from '@nuxtjs/robots-bot-detection'
+
+// Create storage adapter
+const storage = new MemoryAdapter()
+
+// Create session identifier
+const sessionIdentifier = new H3SessionIdentifier()
+
+// Create engine
+const engine = new BotDetectionEngine({
+ storage,
+ sessionIdentifier,
+ config: {
+ thresholds: {
+ likelyBot: 70,
+ definitelyBot: 90
+ }
+ }
+})
+
+// Analyze a request
+const request = {
+ path: '/api/data',
+ method: 'GET',
+ headers: {
+ 'user-agent': 'Mozilla/5.0 ...'
+ },
+ ip: '192.168.1.1',
+ timestamp: Date.now()
+}
+
+const result = await engine.analyze(request)
+console.log(`Bot score: ${result.score}`)
+console.log(`Is bot: ${result.isBot}`)
+```
+
+### H3/Nuxt Integration
+
+```typescript
+import { BotDetectionEngine, UnstorageBehaviorAdapter, H3SessionIdentifier } from '@nuxtjs/robots-bot-detection'
+import { useStorage } from 'unstorage'
+
+const storage = useStorage('redis://localhost:6379')
+const adapter = new UnstorageBehaviorAdapter(storage)
+const sessionIdentifier = new H3SessionIdentifier('your-session-secret')
+
+const engine = new BotDetectionEngine({
+ storage: adapter,
+ sessionIdentifier
+})
+
+// In your H3 handler
+export default defineEventHandler(async (event) => {
+ const result = await engine.analyze(request, event)
+
+ if (result.isBot) {
+ throw createError({
+ statusCode: 429,
+ statusMessage: 'Too Many Requests'
+ })
+ }
+
+ // Continue with normal processing
+})
+```
+
+## API Reference
+
+### BotDetectionEngine
+
+The main engine class for bot detection.
+
+#### Constructor Options
+
+```typescript
+interface BotDetectionEngineOptions {
+ storage: BehaviorStorage
+ sessionIdentifier: SessionIdentifier
+ responseStatusProvider?: ResponseStatusProvider
+ config?: BotDetectionConfig
+}
+```
+
+#### Methods
+
+- `analyze(request: BotDetectionRequest, event?: H3Event): Promise`
+- `updateConfig(config: Partial): void`
+- `cleanup(): Promise`
+
+### Storage Adapters
+
+#### MemoryAdapter
+In-memory storage for development and testing.
+
+#### UnstorageBehaviorAdapter
+Production-ready storage adapter using unstorage.
+
+### Behavior Configuration
+
+Configure which detection behaviors to enable:
+
+```typescript
+const config = {
+ behaviors: {
+ simple: {
+ pathAnalysis: { enabled: true, weight: 1.0 },
+ basicTiming: { enabled: true, weight: 0.8 },
+ basicRateLimit: { enabled: true, weight: 1.2 }
+ },
+ intermediate: {
+ burstDetection: { enabled: true, weight: 1.0 },
+ headerConsistency: { enabled: true, weight: 0.9 }
+ },
+ advanced: {
+ advancedTiming: { enabled: false, weight: 1.5 },
+ browserFingerprint: { enabled: false, weight: 1.3 }
+ }
+ }
+}
+```
+
+## Testing
+
+```bash
+# Run tests
+npm test
+
+# Run tests with coverage
+npm run test:coverage
+
+# Run tests in watch mode
+npm run dev
+```
+
+## License
+
+MIT License - see LICENSE file for details.
\ No newline at end of file
diff --git a/libs/is-bot/package.json b/libs/is-bot/package.json
new file mode 100644
index 00000000..237d69e5
--- /dev/null
+++ b/libs/is-bot/package.json
@@ -0,0 +1,70 @@
+{
+ "name": "@nuxtjs/robots-bot-detection",
+ "version": "1.0.0",
+ "description": "Framework-agnostic bot detection library",
+ "type": "module",
+ "main": "./dist/index.js",
+ "module": "./dist/index.js",
+ "types": "./dist/index.d.ts",
+ "exports": {
+ ".": {
+ "types": "./dist/index.d.ts",
+ "import": "./dist/index.js",
+ "require": "./dist/index.cjs"
+ },
+ "./h3": {
+ "types": "./dist/drivers/h3.d.ts",
+ "import": "./dist/drivers/h3.js",
+ "require": "./dist/drivers/h3.cjs"
+ },
+ "./behaviors": {
+ "types": "./dist/behaviors/index.d.ts",
+ "import": "./dist/behaviors/index.js",
+ "require": "./dist/behaviors/index.cjs"
+ }
+ },
+ "files": [
+ "dist",
+ "src"
+ ],
+ "scripts": {
+ "build": "tsup",
+ "dev": "tsup --watch",
+ "test": "vitest",
+ "test:run": "vitest run",
+ "test:coverage": "vitest run --coverage",
+ "typecheck": "tsc --noEmit",
+ "lint": "eslint src test --ext .ts,.js",
+ "lint:fix": "eslint src test --ext .ts,.js --fix"
+ },
+ "keywords": [
+ "bot-detection",
+ "security",
+ "web-scraping",
+ "rate-limiting",
+ "h3",
+ "nuxt",
+ "nitro"
+ ],
+ "author": "Nuxt Team",
+ "license": "MIT",
+ "dependencies": {
+ "unstorage": "^1.16.0"
+ },
+ "peerDependencies": {
+ "h3": "^1.0.0"
+ },
+ "devDependencies": {
+ "@types/node": "^20.19.4",
+ "eslint": "^9.30.1",
+ "h3": "^1.15.3",
+ "tsup": "^8.5.0",
+ "typescript": "^5.8.3",
+ "vitest": "^3.2.4"
+ },
+ "repository": {
+ "type": "git",
+ "url": "https://github.com/nuxt-modules/robots.git",
+ "directory": "libs/is-bot"
+ }
+}
diff --git a/libs/is-bot/src/adapters/behavior-storage.ts b/libs/is-bot/src/adapters/behavior-storage.ts
new file mode 100644
index 00000000..38c73817
--- /dev/null
+++ b/libs/is-bot/src/adapters/behavior-storage.ts
@@ -0,0 +1,113 @@
+// Adapter to bridge between framework-agnostic storage and behavior types
+import type { Storage } from 'unstorage'
+import type { SessionData, IPData } from '../behavior'
+import type { SiteProfile } from '../types'
+
+export interface BehaviorStorage {
+ getSession(sessionId: string): Promise
+ setSession(sessionId: string, data: SessionData): Promise
+ getIP(ip: string): Promise
+ setIP(ip: string, data: IPData): Promise
+ getSiteProfile(): Promise
+ setSiteProfile(profile: SiteProfile): Promise
+ cleanup?(): Promise
+}
+
+export class UnstorageBehaviorAdapter implements BehaviorStorage {
+ private storage: Storage
+ private prefix: string
+ private sessionTTL: number
+ private ipTTL: number
+ private siteProfileTTL: number
+
+ constructor(storage: Storage, options: {
+ prefix?: string,
+ sessionTTL?: number,
+ ipTTL?: number,
+ siteProfileTTL?: number
+ } = {}) {
+ this.storage = storage
+ this.prefix = options.prefix || 'bot-detection'
+ this.sessionTTL = options.sessionTTL || 24 * 60 * 60 * 1000 // 24 hours default
+ this.ipTTL = options.ipTTL || 7 * 24 * 60 * 60 * 1000 // 7 days default
+ this.siteProfileTTL = options.siteProfileTTL || 30 * 24 * 60 * 60 * 1000 // 30 days default
+ }
+
+ async getSession(sessionId: string): Promise {
+ const key = `${this.prefix}:session:${sessionId}`
+ const data = await this.storage.getItem(key)
+
+ if (!data) return null
+
+ // Check TTL
+ if (Date.now() - data.lastUpdated > this.sessionTTL) {
+ await this.storage.removeItem(key)
+ return null
+ }
+
+ return data
+ }
+
+ async setSession(sessionId: string, data: SessionData): Promise {
+ const key = `${this.prefix}:session:${sessionId}`
+ await this.storage.setItem(key, data)
+ }
+
+ async getIP(ip: string): Promise {
+ const key = `${this.prefix}:ip:${this.sanitizeIP(ip)}`
+ const data = await this.storage.getItem(key)
+
+ if (!data) return null
+
+ // Check TTL
+ if (Date.now() - data.lastUpdated > this.ipTTL) {
+ await this.storage.removeItem(key)
+ return null
+ }
+
+ return data
+ }
+
+ async setIP(ip: string, data: IPData): Promise {
+ const key = `${this.prefix}:ip:${this.sanitizeIP(ip)}`
+ await this.storage.setItem(key, data)
+ }
+
+ async getSiteProfile(): Promise {
+ const key = `${this.prefix}:site-profile`
+ return await this.storage.getItem(key)
+ }
+
+ async setSiteProfile(profile: SiteProfile): Promise {
+ const key = `${this.prefix}:site-profile`
+ await this.storage.setItem(key, profile)
+ }
+
+ async cleanup(): Promise {
+ const keys = await this.storage.getKeys(`${this.prefix}:`)
+ const now = Date.now()
+
+ // Clean up expired sessions
+ const sessionKeys = keys.filter(key => key.includes(':session:'))
+ for (const key of sessionKeys) {
+ const data = await this.storage.getItem(key)
+ if (data && (now - data.lastUpdated > this.sessionTTL)) {
+ await this.storage.removeItem(key)
+ }
+ }
+
+ // Clean up expired IP data
+ const ipKeys = keys.filter(key => key.includes(':ip:'))
+ for (const key of ipKeys) {
+ const data = await this.storage.getItem(key)
+ if (data && (now - data.lastUpdated > this.ipTTL)) {
+ await this.storage.removeItem(key)
+ }
+ }
+ }
+
+ private sanitizeIP(ip: string): string {
+ // Replace : and . with - for safe key names
+ return ip.replace(/[:.]/g, '-')
+ }
+}
\ No newline at end of file
diff --git a/libs/is-bot/src/adapters/h3.ts b/libs/is-bot/src/adapters/h3.ts
new file mode 100644
index 00000000..05fbd35b
--- /dev/null
+++ b/libs/is-bot/src/adapters/h3.ts
@@ -0,0 +1,131 @@
+// H3/Nuxt adapters for bot detection
+import type { H3Event } from 'h3'
+import { getHeaders, getRequestIP, getResponseStatus, useSession } from 'h3'
+import type {
+ BotDetectionRequest,
+ SessionIdentifier,
+ ResponseStatusProvider
+} from '../types'
+
+/**
+ * Convert H3Event to BotDetectionRequest
+ */
+export function h3ToBotDetectionRequest(event: H3Event): BotDetectionRequest {
+ const headers = getHeaders(event)
+ const ip = getRequestIP(event, { xForwardedFor: true }) || '127.0.0.1'
+
+ return {
+ path: event.path || '/',
+ method: event.method || 'GET',
+ headers: headers as Record,
+ ip,
+ timestamp: Date.now()
+ }
+}
+
+/**
+ * H3 Session Identifier using useSession
+ */
+export class H3SessionIdentifier implements SessionIdentifier {
+ private sessionPassword: string
+
+ constructor(sessionPassword?: string) {
+ this.sessionPassword = sessionPassword || 'default-bot-detection-password'
+ }
+
+ async getSessionId(request: BotDetectionRequest): Promise {
+ // This is a simplified version - in practice you'd need to pass the H3Event
+ // For now, we'll generate a session ID based on IP and user agent
+ const userAgent = Array.isArray(request.headers['user-agent'])
+ ? request.headers['user-agent'][0]
+ : request.headers['user-agent'] || ''
+
+ // Simple hash for demo - in practice use proper session handling
+ const sessionKey = `${request.ip}-${this.simpleHash(userAgent)}`
+ return sessionKey
+ }
+
+ private simpleHash(str: string): string {
+ let hash = 0
+ for (let i = 0; i < str.length; i++) {
+ const char = str.charCodeAt(i)
+ hash = ((hash << 5) - hash) + char
+ hash = hash & hash // Convert to 32-bit integer
+ }
+ return Math.abs(hash).toString(36)
+ }
+}
+
+/**
+ * H3 Session Identifier using actual H3 sessions
+ */
+export class H3RealSessionIdentifier implements SessionIdentifier {
+ private sessionPassword: string
+
+ constructor(sessionPassword?: string) {
+ this.sessionPassword = sessionPassword || 'default-bot-detection-password'
+ }
+
+ // Note: This requires the actual H3Event, so you'd need to modify the interface
+ async getSessionIdFromEvent(event: H3Event): Promise {
+ const session = await useSession(event, {
+ password: this.sessionPassword
+ })
+ return session.id
+ }
+
+ // Fallback implementation for the interface
+ getSessionId(request: BotDetectionRequest): string {
+ // Generate deterministic session ID from request data
+ const userAgent = Array.isArray(request.headers['user-agent'])
+ ? request.headers['user-agent'][0]
+ : request.headers['user-agent'] || ''
+
+ return `${request.ip}-${this.simpleHash(userAgent)}`
+ }
+
+ private simpleHash(str: string): string {
+ let hash = 0
+ for (let i = 0; i < str.length; i++) {
+ const char = str.charCodeAt(i)
+ hash = ((hash << 5) - hash) + char
+ hash = hash & hash
+ }
+ return Math.abs(hash).toString(36)
+ }
+}
+
+/**
+ * H3 Response Status Provider
+ */
+export class H3ResponseStatusProvider implements ResponseStatusProvider {
+ private eventMap = new WeakMap()
+
+ // Register the H3Event for a request so we can get status later
+ registerEvent(request: BotDetectionRequest, event: H3Event) {
+ this.eventMap.set(request, event)
+ }
+
+ getStatus(request: BotDetectionRequest): number | undefined {
+ const event = this.eventMap.get(request)
+ if (!event) return undefined
+
+ try {
+ return getResponseStatus(event)
+ } catch {
+ return undefined
+ }
+ }
+}
+
+/**
+ * Utility to create a BotDetectionRequest that maintains reference to H3Event
+ */
+export function createTrackedBotDetectionRequest(
+ event: H3Event,
+ statusProvider: H3ResponseStatusProvider
+): BotDetectionRequest {
+ const request = h3ToBotDetectionRequest(event)
+ statusProvider.registerEvent(request, event)
+ return request
+}
\ No newline at end of file
diff --git a/libs/is-bot/src/adapters/memory.ts b/libs/is-bot/src/adapters/memory.ts
new file mode 100644
index 00000000..ca965f96
--- /dev/null
+++ b/libs/is-bot/src/adapters/memory.ts
@@ -0,0 +1,90 @@
+// In-memory storage adapter for bot detection (for testing/development)
+import type { BotDetectionStorage, SessionData, IPData, SiteProfile } from '../types'
+
+export class MemoryAdapter implements BotDetectionStorage {
+ private sessions = new Map()
+ private ips = new Map()
+ private siteProfile: SiteProfile | null = null
+ private ttl: number
+
+ constructor(options: { ttl?: number } = {}) {
+ this.ttl = options.ttl || 24 * 60 * 60 * 1000 // 24 hours default
+ }
+
+ async getSession(sessionId: string): Promise {
+ const data = this.sessions.get(sessionId)
+
+ if (!data) return null
+
+ // Check TTL
+ if (Date.now() - data.lastUpdated > this.ttl) {
+ this.sessions.delete(sessionId)
+ return null
+ }
+
+ return data
+ }
+
+ async setSession(sessionId: string, data: SessionData): Promise {
+ this.sessions.set(sessionId, data)
+ }
+
+ async getIP(ip: string): Promise {
+ const data = this.ips.get(ip)
+
+ if (!data) return null
+
+ // Check TTL
+ if (Date.now() - data.lastUpdated > this.ttl) {
+ this.ips.delete(ip)
+ return null
+ }
+
+ return data
+ }
+
+ async setIP(ip: string, data: IPData): Promise {
+ this.ips.set(ip, data)
+ }
+
+ async getSiteProfile(): Promise {
+ return this.siteProfile
+ }
+
+ async setSiteProfile(profile: SiteProfile): Promise {
+ this.siteProfile = profile
+ }
+
+ async cleanup(): Promise {
+ const now = Date.now()
+
+ // Clean up expired sessions
+ for (const [sessionId, data] of this.sessions.entries()) {
+ if (now - data.lastUpdated > this.ttl) {
+ this.sessions.delete(sessionId)
+ }
+ }
+
+ // Clean up expired IP data
+ for (const [ip, data] of this.ips.entries()) {
+ if (now - data.lastUpdated > this.ttl) {
+ this.ips.delete(ip)
+ }
+ }
+ }
+
+ // Development helpers
+ getStats() {
+ return {
+ sessions: this.sessions.size,
+ ips: this.ips.size,
+ hasSiteProfile: !!this.siteProfile
+ }
+ }
+
+ clear() {
+ this.sessions.clear()
+ this.ips.clear()
+ this.siteProfile = null
+ }
+}
\ No newline at end of file
diff --git a/libs/is-bot/src/adapters/unstorage.ts b/libs/is-bot/src/adapters/unstorage.ts
new file mode 100644
index 00000000..83707ed0
--- /dev/null
+++ b/libs/is-bot/src/adapters/unstorage.ts
@@ -0,0 +1,134 @@
+// Unstorage adapter for bot detection storage
+import type { Storage } from 'unstorage'
+import type { BotDetectionStorage, SessionData, IPData, SiteProfile } from '../types'
+
+export class UnstorageAdapter implements BotDetectionStorage {
+ private storage: Storage
+ private prefix: string
+ private sessionTTL: number
+ private ipTTL: number
+ private siteProfileTTL: number
+
+ constructor(storage: Storage, options: {
+ prefix?: string,
+ ttl?: number,
+ sessionTTL?: number,
+ ipTTL?: number,
+ siteProfileTTL?: number
+ } = {}) {
+ this.storage = storage
+ this.prefix = options.prefix || 'bot-detection'
+ this.sessionTTL = options.sessionTTL || options.ttl || 24 * 60 * 60 * 1000 // 24 hours default
+ this.ipTTL = options.ipTTL || options.ttl || 7 * 24 * 60 * 60 * 1000 // 7 days default
+ this.siteProfileTTL = options.siteProfileTTL || options.ttl || 30 * 24 * 60 * 60 * 1000 // 30 days default
+ }
+
+ async getSession(sessionId: string): Promise {
+ const key = `${this.prefix}:session:${sessionId}`
+ const data = await this.storage.getItem(key)
+
+ if (!data) return null
+
+ // Check TTL
+ if (Date.now() - data.lastUpdated > this.sessionTTL) {
+ await this.storage.removeItem(key)
+ return null
+ }
+
+ return data
+ }
+
+ async setSession(sessionId: string, data: SessionData): Promise {
+ const key = `${this.prefix}:session:${sessionId}`
+ await this.storage.setItem(key, data)
+ }
+
+ async getIP(ip: string): Promise {
+ const key = `${this.prefix}:ip:${this.sanitizeIP(ip)}`
+ const data = await this.storage.getItem(key)
+
+ if (!data) return null
+
+ // Check TTL and cleanup old sessions
+ const now = Date.now()
+ if (now - data.lastUpdated > this.ipTTL) {
+ // Clean up old sessions
+ data.activeSessions = data.activeSessions.filter(sessionId => {
+ // This is a simplification - in practice you'd check session TTL
+ return true
+ })
+
+ if (data.activeSessions.length === 0) {
+ await this.storage.removeItem(key)
+ return null
+ }
+ }
+
+ return data
+ }
+
+ async setIP(ip: string, data: IPData): Promise {
+ const key = `${this.prefix}:ip:${this.sanitizeIP(ip)}`
+ await this.storage.setItem(key, data)
+ }
+
+ async getSiteProfile(): Promise {
+ const key = `${this.prefix}:site-profile`
+ const data = await this.storage.getItem(key)
+
+ if (!data) return null
+
+ // Reconstruct Set and Map objects
+ if (data.existingPaths && Array.isArray(data.existingPaths)) {
+ data.existingPaths = new Set(data.existingPaths as any)
+ }
+
+ if (data.userAgentPatterns && typeof data.userAgentPatterns === 'object') {
+ data.userAgentPatterns = new Map(Object.entries(data.userAgentPatterns as any))
+ }
+
+ return data
+ }
+
+ async setSiteProfile(profile: SiteProfile): Promise {
+ const key = `${this.prefix}:site-profile`
+
+ // Serialize Set and Map objects for storage
+ const serializable = {
+ ...profile,
+ existingPaths: Array.from(profile.existingPaths),
+ userAgentPatterns: Object.fromEntries(profile.userAgentPatterns)
+ }
+
+ await this.storage.setItem(key, serializable)
+ }
+
+ async cleanup(): Promise {
+ // Get all keys with our prefix
+ const keys = await this.storage.getKeys(`${this.prefix}:`)
+ const now = Date.now()
+
+ // Clean up expired sessions
+ const sessionKeys = keys.filter(key => key.includes(':session:'))
+ for (const key of sessionKeys) {
+ const data = await this.storage.getItem(key)
+ if (data && (now - data.lastUpdated > this.sessionTTL)) {
+ await this.storage.removeItem(key)
+ }
+ }
+
+ // Clean up expired IP data
+ const ipKeys = keys.filter(key => key.includes(':ip:'))
+ for (const key of ipKeys) {
+ const data = await this.storage.getItem(key)
+ if (data && (now - data.lastUpdated > this.ipTTL)) {
+ await this.storage.removeItem(key)
+ }
+ }
+ }
+
+ private sanitizeIP(ip: string): string {
+ // Replace : and . with - for safe key names
+ return ip.replace(/[:.]/g, '-')
+ }
+}
\ No newline at end of file
diff --git a/libs/is-bot/src/behavior.ts b/libs/is-bot/src/behavior.ts
new file mode 100644
index 00000000..54a4ee71
--- /dev/null
+++ b/libs/is-bot/src/behavior.ts
@@ -0,0 +1,843 @@
+// Bot detection behavior analysis - framework agnostic
+
+// Common sensitive paths that bots target - expanded with more patterns
+export const SENSITIVE_PATHS = [
+ '/wp-login',
+ '/xmlrpc.php',
+ '/.env',
+ '/phpmyadmin',
+ '/setup',
+ '/install',
+ '/config',
+ '/.git',
+ '/.svn',
+ '/api/graphql',
+ '/graphql',
+ // Additional common bot targets
+ '/wp-content',
+ '/wp-includes',
+ '/wp-json',
+ '/.well-known/security.txt',
+ '/vendor/',
+ '/server-status',
+ '/solr/',
+ '/jenkins/',
+ '/.DS_Store',
+ '/actuator/',
+ '/console/',
+ '/wp-admin',
+ '/admin-login.php',
+ '/wp-login-hidden',
+]
+
+// Honeypot/high-sensitivity paths - these could be legitimate in some cases
+// but are frequently targeted by bots and rarely used by regular users
+export const MAYBE_SENSITIVE_PATHS = [
+ '/admin',
+ '/login',
+ '/administrator',
+ '/includes/config',
+ '/.hidden-login',
+ '/robots.txt.bak',
+ '/administrator/index.php',
+ '/myadmin',
+ '/admin_area',
+ '/panel',
+ '/cpanel',
+ '/dashboard',
+]
+
+// Enhanced bot detection score thresholds with an intermediate level
+export const BOT_SCORE_THRESHOLDS = {
+ DEFINITELY_BOT: 90,
+ LIKELY_BOT: 70,
+ SUSPICIOUS: 40,
+ PROBABLY_HUMAN: 20,
+ DEFINITELY_HUMAN: 5,
+}
+
+// Configuration setter functions for external framework integration
+export function setBotScoreThresholds(thresholds: {
+ definitelyBot?: number
+ likelyBot?: number
+ suspicious?: number
+}) {
+ if (thresholds.definitelyBot !== undefined) {
+ BOT_SCORE_THRESHOLDS.DEFINITELY_BOT = thresholds.definitelyBot
+ }
+ if (thresholds.likelyBot !== undefined) {
+ BOT_SCORE_THRESHOLDS.LIKELY_BOT = thresholds.likelyBot
+ }
+ if (thresholds.suspicious !== undefined) {
+ BOT_SCORE_THRESHOLDS.SUSPICIOUS = thresholds.suspicious
+ }
+}
+
+export function addCustomSensitivePaths(paths: string[]) {
+ SENSITIVE_PATHS.push(...paths)
+}
+
+// Updated behavior weights with increased penalties for timing issues
+export const BEHAVIOR_WEIGHTS = {
+ SENSITIVE_PATH: 15, // Accessing known sensitive paths
+ MAYBE_SENSITIVE_PATH: 5, // Accessing potentially sensitive paths (honeypot/admin areas)
+ RAPID_REQUESTS: 20, // Too many requests in short time
+ REPEATED_ERRORS: 15, // Repeated 404s or errors
+ UNUSUAL_PATTERN: 25, // Unusual access pattern
+ NONEXISTENT_RESOURCES: 10, // Requesting resources that don't exist
+ REQUEST_CONSISTENCY: 20, // Consistency in request patterns
+ MULTIPLE_SENSITIVE_HITS: 40, // Multiple hits to different sensitive paths
+ RESOURCE_TIMING: 25, // Abnormal timing between resource requests (increased from 15)
+ SESSION_ANOMALY: 30, // Suspicious session behavior
+}
+
+// Traffic classification - helps distinguish between different user types
+export enum TrafficType {
+ REGULAR_USER = 'regular_user',
+ SUSPICIOUS = 'suspicious_bot',
+ MALICIOUS_BOT = 'malicious_bot',
+ UNKNOWN = 'unknown',
+}
+
+// Enhanced session data with more behavioral indicators
+export interface SessionData {
+ lastRequests: Array<{
+ timestamp: number
+ path: string
+ status?: number
+ timeSincePrevious?: number
+ method?: string
+ }>
+ suspiciousPathHits: number
+ maybeSensitivePathHits: number
+ uniqueSensitivePathsAccessed: string[] // Track unique sensitive paths accessed
+ errorCount: number
+ score: number
+ lastScore: number
+ lastUpdated: number
+ trafficType: TrafficType
+ knownGoodActions: number // Count of actions that indicate human behavior
+ tempExemptUntil?: number // Timestamp for temporary exemption
+ requestMethodVariety: string[] // Array of used HTTP methods
+ averageTimeBetweenRequests?: number
+ requestSequenceEntropy: number // Measure of randomness in request sequence
+ firstSeenAt: number // When the session was first created
+ behaviorChangePoints?: number[] // Timestamps where behavior significantly changed
+}
+
+export interface IPData {
+ sessionCount: number
+ activeSessions: string[] // Track active session IDs
+ suspiciousScore: number
+ lastUpdated: number
+ legitSessionsCount: number // Count of sessions that passed human verification
+ sessionsPerHour?: number // Rate of new sessions creation
+ lastSessionCreated?: number // Timestamp of the last session created+
+ isBot?: boolean
+ isBotConfidence?: number
+ details?: { name: string, type: string, trusted?: boolean } | null
+ factores: string[] // List of factors that contributed to the score
+}
+
+// Helper to check if path is in the maybe-sensitive category
+export function isMaybeSensitivePath(path: string): boolean {
+ return MAYBE_SENSITIVE_PATHS.some(sp => path.includes(sp))
+}
+
+// Calculate entropy of request sequences to detect non-human patterns
+function calculateRequestEntropy(paths: string[]): number {
+ if (paths.length < 3)
+ return 0
+
+ // Count occurrences of each path
+ const pathCounts: Record = {}
+ for (const path of paths) {
+ pathCounts[path] = (pathCounts[path] || 0) + 1
+ }
+
+ // Calculate entropy
+ let entropy = 0
+ const totalPaths = paths.length
+
+ for (const path in pathCounts) {
+ const probability = pathCounts[path] / totalPaths
+ entropy -= probability * Math.log2(probability)
+ }
+
+ return entropy
+}
+
+// Detect if a request sequence matches natural browsing patterns
+function analyzeRequestSequence(requests: Array<{ path: string, timestamp: number }>): {
+ isNaturalBrowsing: boolean
+ entropy: number
+ timeConsistency: number
+} {
+ if (requests.length < 5) {
+ return { isNaturalBrowsing: true, entropy: 0, timeConsistency: 1 }
+ }
+
+ // 1. Check time intervals between requests
+ const intervals: number[] = []
+
+ // Sort requests by timestamp (oldest to newest) for interval calculation
+ const sortedRequests = [...requests].sort((a, b) => a.timestamp - b.timestamp)
+
+ for (let i = 1; i < sortedRequests.length; i++) {
+ intervals.push(sortedRequests[i].timestamp - sortedRequests[i - 1].timestamp)
+ }
+
+ // Calculate variance in intervals
+ const avgInterval = intervals.reduce((sum, val) => sum + val, 0) / intervals.length
+ const variance = intervals.reduce((sum, val) => sum + (val - avgInterval) ** 2, 0) / intervals.length
+
+ // Natural browsing has some variance in timing
+ const timeConsistency = Math.min(1, variance / (avgInterval * avgInterval))
+
+ // 2. Calculate path entropy
+ const paths = requests.map(r => r.path)
+ const entropy = calculateRequestEntropy(paths)
+
+ // 3. Multiple indicators of unnatural browsing patterns
+ let suspiciousPatternCount = 0
+
+ // A. Check for alphabetical or sequential scanning patterns
+ const pathsInOrder = sortedRequests.map(r => r.path)
+ let sequentialOrdering = true
+
+ for (let i = 1; i < pathsInOrder.length; i++) {
+ // If paths don't progress in a way that suggests scanning, it's more natural
+ if (pathsInOrder[i].localeCompare(pathsInOrder[i - 1]) < 0) {
+ sequentialOrdering = false
+ break
+ }
+ }
+
+ if (sequentialOrdering && pathsInOrder.length >= 4) {
+ suspiciousPatternCount++
+ }
+
+ // B. Check for common prefix patterns (like crawling similar endpoints)
+ const pathPrefixes = new Map()
+ for (const path of paths) {
+ // Extract the first segment of the path (e.g., "/admin" from "/admin/users")
+ const prefix = `/${path.split('/')[1]}`
+ pathPrefixes.set(prefix, (pathPrefixes.get(prefix) || 0) + 1)
+ }
+
+ // If 80% of requests are to the same prefix, it's suspicious
+ for (const [, count] of pathPrefixes.entries()) {
+ if (count >= Math.ceil(paths.length * 0.8) && paths.length >= 4) {
+ suspiciousPatternCount++
+ break
+ }
+ }
+
+ // C. Look for numeric incrementation patterns in paths (like id scanning)
+ const hasNumericPattern = paths.some((path) => {
+ const matches = path.match(/(\d+)/g)
+ return matches && matches.length > 0
+ })
+
+ if (hasNumericPattern) {
+ let numericSequential = true
+ const numericValues: number[] = []
+
+ // Extract numeric values from paths
+ for (const path of pathsInOrder) {
+ const matches = path.match(/(\d+)/g)
+ if (matches && matches.length > 0) {
+ numericValues.push(Number.parseInt(matches[0], 10))
+ }
+ else {
+ numericSequential = false
+ break
+ }
+ }
+
+ // Check if numeric values are sequential or have a pattern
+ if (numericSequential && numericValues.length >= 3) {
+ let hasPattern = true
+ const diff = numericValues[1] - numericValues[0]
+
+ for (let i = 2; i < numericValues.length; i++) {
+ if (numericValues[i] - numericValues[i - 1] !== diff) {
+ hasPattern = false
+ break
+ }
+ }
+
+ if (hasPattern) {
+ suspiciousPatternCount++
+ }
+ }
+ }
+
+ // D. Check for consistent path length/structure (indicative of automation)
+ const pathLengths = paths.map(p => p.length)
+ const avgLength = pathLengths.reduce((sum, len) => sum + len, 0) / pathLengths.length
+ const lengthVariance = pathLengths.reduce((sum, len) => sum + (len - avgLength) ** 2, 0) / pathLengths.length
+
+ // If path lengths are very consistent, it's suspicious
+ if (Math.sqrt(lengthVariance) / avgLength < 0.1 && paths.length >= 4) {
+ suspiciousPatternCount++
+ }
+
+ // Combine indicators to determine if this is natural browsing
+ const isNaturalBrowsing = (
+ // Either high entropy (varied paths) and some time variance
+ (entropy > 1.5 && timeConsistency > 0.2)
+ // Or no suspicious patterns detected
+ || suspiciousPatternCount === 0
+ )
+
+ return {
+ isNaturalBrowsing,
+ entropy,
+ timeConsistency,
+ }
+}
+
+// Helper function to identify adaptive rate limits based on client history
+function calculateRateLimit(sessionData: SessionData): number {
+ // Base rate limit - start with a moderate default
+ let rateLimit = 15 // requests per minute
+
+ // Adjust based on client behavior
+ if (sessionData.trafficType === TrafficType.REGULAR_USER) {
+ // Regular users can have higher bursts during normal browsing
+ rateLimit = 30
+ }
+ else if (sessionData.suspiciousPathHits > 0) {
+ // Clients that hit suspicious paths get stricter limits
+ rateLimit = Math.max(5, rateLimit - sessionData.suspiciousPathHits * 2)
+ }
+
+ // Adjust for known good behavior
+ if (sessionData.knownGoodActions > 5) {
+ // Clients with good history get more flexibility
+ rateLimit += Math.min(20, sessionData.knownGoodActions)
+ }
+
+ return rateLimit
+}
+
+// Detect potential session hijacking or cookie theft
+function detectSessionAnomaly(ipData: IPData, sessionData: SessionData, timestamp: number = Date.now()): {
+ suspicious: boolean
+ severity: number
+ reason?: string
+} {
+ const result = {
+ suspicious: false,
+ severity: 0,
+ reason: '',
+ }
+
+ const SESSION_AGE_THRESHOLD = 24 * 60 * 60 * 1000 // 24 hours in milliseconds
+ const now = timestamp
+
+ // Check for IP with many sessions
+ if (ipData.activeSessions.length > 10) {
+ result.suspicious = true
+ result.severity = Math.min(70, ipData.activeSessions.length * 5)
+ result.reason = 'MANY_SESSIONS'
+ return result
+ }
+
+ // Check for high session creation rate
+ if (ipData.sessionsPerHour && ipData.sessionsPerHour > 5) {
+ result.suspicious = true
+ result.severity = Math.min(60, ipData.sessionsPerHour * 10)
+ result.reason = 'RAPID_SESSION_CREATION'
+ return result
+ }
+
+ // Check for abrupt behavior changes in old sessions
+ if (sessionData.firstSeenAt && (now - sessionData.firstSeenAt > SESSION_AGE_THRESHOLD)) {
+ // Old session with sudden suspicious activity
+ if (sessionData.suspiciousPathHits > 0 && sessionData.lastRequests.length > 5) {
+ // Check if suspicious behavior started recently
+ const recentRequests = sessionData.lastRequests.slice(-5)
+ const olderRequests = sessionData.lastRequests.slice(0, -5)
+
+ // Calculate scores for both segments
+ const recentSuspicious = recentRequests.filter(r =>
+ SENSITIVE_PATHS.some(sp => r.path.includes(sp))
+ || MAYBE_SENSITIVE_PATHS.some(sp => r.path.includes(sp)),
+ ).length
+
+ const olderSuspicious = olderRequests.filter(r =>
+ SENSITIVE_PATHS.some(sp => r.path.includes(sp))
+ || MAYBE_SENSITIVE_PATHS.some(sp => r.path.includes(sp)),
+ ).length
+
+ // If recent behavior is much more suspicious than older behavior
+ if (recentSuspicious > 0 && (olderSuspicious === 0 || recentSuspicious / olderRequests.length > olderSuspicious / olderRequests.length * 3)) {
+ result.suspicious = true
+ result.severity = 50
+ result.reason = 'BEHAVIOR_CHANGE'
+
+ // Mark this point as a behavior change point
+ if (!sessionData.behaviorChangePoints) {
+ sessionData.behaviorChangePoints = []
+ }
+ sessionData.behaviorChangePoints.push(now)
+ }
+ }
+ }
+
+ return result
+}
+
+export interface DetectionFactor {
+ type: string
+ weight: number
+ evidence: any
+ timestamp: number
+ description: string
+}
+
+export interface DebugInfo {
+ sessionAge: number
+ requestCount: number
+ pathHistory: string[]
+ timingAnalysis: {
+ avgInterval: number
+ consistency: number
+ entropy: number
+ }
+ factors: DetectionFactor[]
+ ipInfo: {
+ sessionCount: number
+ totalScore: number
+ isBlocked: boolean
+ isTrusted: boolean
+ }
+ confidence: number
+ reasoning: string[]
+ enhancedAnalysis?: any
+ modularAnalysis?: any
+}
+
+export interface BotDetectionBehavior {
+ id: string
+ session: SessionData
+ ip: IPData
+ dirty?: boolean
+ debug?: DebugInfo
+}
+
+export function analyzeSessionAndIpBehavior({
+ request,
+ behavior,
+ timestamp = Date.now(),
+ debug = false,
+}: {
+ request: { path: string; method: string }
+ behavior: BotDetectionBehavior
+ timestamp?: number
+ debug?: boolean
+}) {
+ // Configuration should be set externally via setBotScoreThresholds()
+
+ const path = request.path || ''
+ const method = request.method || 'GET'
+ // Check if this is a maybe-sensitive path
+ const isMaybeSensitive = isMaybeSensitivePath(path)
+ const now = timestamp
+
+ // Initialize or get session data with improved defaults
+ const sessionData: SessionData = behavior.session
+
+ // Initialize or get IP data with improved defaults
+ const ipData: IPData = behavior.ip
+
+ // Initialize debug tracking
+ const detectionFactors: DetectionFactor[] = []
+ const reasoning: string[] = []
+
+ function addFactor(type: string, weight: number, evidence: any, description: string) {
+ const factor: DetectionFactor = {
+ type,
+ weight,
+ evidence,
+ timestamp: now,
+ description,
+ }
+ detectionFactors.push(factor)
+ if (debug) {
+ reasoning.push(`${type}: ${description} (weight: ${weight})`)
+ }
+ return weight
+ }
+
+ // Calculate scoring factors
+ const scoreFactors: Record = {}
+
+ // Check for maybe-sensitive path access
+ if (isMaybeSensitive) {
+ sessionData.maybeSensitivePathHits = (sessionData.maybeSensitivePathHits || 0) + 1
+
+ // Track unique sensitive paths for detecting scanning behavior
+ sessionData.uniqueSensitivePathsAccessed = sessionData.uniqueSensitivePathsAccessed || []
+ if (!sessionData.uniqueSensitivePathsAccessed.includes(path)) {
+ sessionData.uniqueSensitivePathsAccessed.push(path)
+ }
+
+ // Apply score - smaller penalty for first hit, larger for repeated behavior
+ if (sessionData.maybeSensitivePathHits === 1) {
+ scoreFactors.MAYBE_SENSITIVE_PATH = addFactor(
+ 'MAYBE_SENSITIVE_PATH',
+ BEHAVIOR_WEIGHTS.MAYBE_SENSITIVE_PATH,
+ { path, hitCount: 1 },
+ `First access to potentially sensitive path: ${path}`,
+ )
+ }
+ else if (sessionData.maybeSensitivePathHits > 1) {
+ // Multiple hits to sensitive paths is more suspicious
+ const weight = BEHAVIOR_WEIGHTS.MAYBE_SENSITIVE_PATH * Math.min(3, sessionData.maybeSensitivePathHits)
+ scoreFactors.MAYBE_SENSITIVE_PATH = addFactor(
+ 'MAYBE_SENSITIVE_PATH',
+ weight,
+ { path, hitCount: sessionData.maybeSensitivePathHits },
+ `Multiple access to sensitive paths (${sessionData.maybeSensitivePathHits} hits)`,
+ )
+
+ // If they hit multiple different sensitive paths, that's even more suspicious
+ if (sessionData.uniqueSensitivePathsAccessed.length >= 2) {
+ scoreFactors.MULTIPLE_SENSITIVE_HITS = addFactor(
+ 'MULTIPLE_SENSITIVE_HITS',
+ BEHAVIOR_WEIGHTS.MULTIPLE_SENSITIVE_HITS,
+ { uniquePaths: sessionData.uniqueSensitivePathsAccessed },
+ `Scanning behavior: ${sessionData.uniqueSensitivePathsAccessed.length} different sensitive paths`,
+ )
+ }
+ }
+ }
+
+ // Calculate time since previous request if applicable
+ let timeSincePrevious = 0
+ if (sessionData.lastRequests.length > 0) {
+ timeSincePrevious = now - sessionData.lastRequests[sessionData.lastRequests.length - 1].timestamp
+ }
+
+ // Track this request with enhanced metadata
+ sessionData.lastRequests.push({
+ timestamp: now,
+ path,
+ method,
+ timeSincePrevious,
+ })
+
+ // Track HTTP method variety
+ if (!sessionData.requestMethodVariety.includes(method)) {
+ sessionData.requestMethodVariety.push(method)
+ }
+
+ // Only keep last 30 requests for better pattern analysis
+ if (sessionData.lastRequests.length > 30) {
+ sessionData.lastRequests.shift()
+ }
+
+ // Apply time decay to previous scores (reduce by ~10% per hour)
+ const hoursSinceLastUpdate = (now - sessionData.lastUpdated) / (1000 * 60 * 60)
+ if (hoursSinceLastUpdate > 0) {
+ sessionData.score = Math.max(0, sessionData.score * 0.9 ** hoursSinceLastUpdate)
+ }
+
+ // Associate this session with the IP if not already tracked
+ if (!ipData.activeSessions.includes(behavior.id)) {
+ ipData.activeSessions.push(behavior.id)
+ ipData.sessionCount = ipData.activeSessions.length
+
+ // Calculate session creation rate
+ if (ipData.lastSessionCreated) {
+ const hoursSinceLastSession = (now - ipData.lastSessionCreated) / (1000 * 60 * 60)
+
+ if (hoursSinceLastSession < 1) {
+ // If creating sessions more than once per hour, track the rate
+ ipData.sessionsPerHour = ipData.sessionsPerHour
+ ? (ipData.sessionsPerHour * 0.7 + (1 / hoursSinceLastSession) * 0.3) // Weighted average
+ : (1 / hoursSinceLastSession)
+ }
+ }
+ ipData.lastSessionCreated = now
+ }
+
+ // 1. Check for sensitive path access
+ if (SENSITIVE_PATHS.some(sensitivePath => path.includes(sensitivePath))) {
+ sessionData.suspiciousPathHits++
+ scoreFactors.SENSITIVE_PATH = addFactor(
+ 'SENSITIVE_PATH',
+ BEHAVIOR_WEIGHTS.SENSITIVE_PATH,
+ { path, hitCount: sessionData.suspiciousPathHits },
+ `Access to highly sensitive path: ${path}`,
+ )
+ }
+
+ // 2. Check for rapid requests with adaptive rate limiting
+ const oneMinuteAgo = now - 60000
+ const requestsLastMinute = sessionData.lastRequests.filter(req => req.timestamp > oneMinuteAgo).length
+ const adaptiveRateLimit = calculateRateLimit(sessionData)
+
+ if (requestsLastMinute > adaptiveRateLimit) {
+ // Apply score proportional to how much the limit was exceeded
+ const overageRatio = requestsLastMinute / adaptiveRateLimit
+ const weight = Math.min(
+ BEHAVIOR_WEIGHTS.RAPID_REQUESTS * overageRatio,
+ BEHAVIOR_WEIGHTS.RAPID_REQUESTS * 2, // Cap at double the weight
+ )
+ scoreFactors.RAPID_REQUESTS = addFactor(
+ 'RAPID_REQUESTS',
+ weight,
+ { requestsLastMinute, rateLimit: adaptiveRateLimit, overageRatio },
+ `Too many requests: ${requestsLastMinute}/${adaptiveRateLimit} (${Math.round(overageRatio * 100)}% over limit)`,
+ )
+ }
+
+ // 3. Analyze request sequence for natural browsing patterns
+ if (sessionData.lastRequests.length >= 5) {
+ const sequenceAnalysis = analyzeRequestSequence(sessionData.lastRequests)
+ sessionData.requestSequenceEntropy = sequenceAnalysis.entropy
+
+ if (!sequenceAnalysis.isNaturalBrowsing) {
+ scoreFactors.UNUSUAL_PATTERN = BEHAVIOR_WEIGHTS.UNUSUAL_PATTERN
+ * (1 - Math.min(1, sequenceAnalysis.entropy / 2))
+ }
+ else {
+ // Reduce score for natural browsing patterns - positive reinforcement
+ sessionData.score = Math.max(0, sessionData.score - 5)
+ sessionData.knownGoodActions += 1
+ }
+ }
+
+ // 4. Check for session anomaly - add anomaly detection logic here
+ const sessionAnomaly = detectSessionAnomaly(ipData, sessionData, timestamp)
+ if (sessionAnomaly.suspicious) {
+ scoreFactors.SESSION_ANOMALY = Math.min(
+ BEHAVIOR_WEIGHTS.SESSION_ANOMALY,
+ sessionAnomaly.severity,
+ )
+ }
+
+ // 5. Check request timing consistency
+ // Bots often have very consistent intervals between requests
+ if (sessionData.lastRequests.length > 5) {
+ // Only analyze the existing requests, not including the one just added
+ // This prevents the new request from breaking the pattern analysis
+ const existingRequests = sessionData.lastRequests.slice(0, -1)
+
+ // Extract intervals only from requests that have a valid timeSincePrevious value
+ const intervals: number[] = []
+ for (let i = 0; i < existingRequests.length; i++) {
+ const timeSincePrevious = existingRequests[i]?.timeSincePrevious
+ if (timeSincePrevious && timeSincePrevious > 0) {
+ intervals.push(timeSincePrevious)
+ }
+ }
+
+ // Only proceed if we have enough intervals to analyze
+ if (intervals.length >= 4) {
+ // Calculate mean and standard deviation
+ const mean = intervals.reduce((sum, val) => sum + val, 0) / intervals.length
+ const variance = intervals.reduce((sum, val) => sum + (val - mean) ** 2, 0) / intervals.length
+ const stdDev = Math.sqrt(variance)
+
+ // Very low standard deviation indicates suspiciously consistent timing
+ const coefficientOfVariation = stdDev / mean
+
+ // Enhanced scoring logic with multiple tiers of suspicion:
+
+ // Extremely precise timing (practically impossible for humans)
+ if (coefficientOfVariation < 0.05 && intervals.length >= 5) {
+ // This is absolutely a bot - humans cannot maintain this precision
+ scoreFactors.RESOURCE_TIMING = BEHAVIOR_WEIGHTS.RESOURCE_TIMING * 3 // Triple the weight
+
+ // If very fast as well (sub-second), even more suspicious
+ if (mean < 1000) {
+ scoreFactors.RESOURCE_TIMING += 15 // Additional penalty for inhuman speed
+ }
+ }
+ // Very suspicious timing
+ else if (coefficientOfVariation < 0.1 && mean < 2000) {
+ // Highly suspicious but not impossible
+ scoreFactors.RESOURCE_TIMING = BEHAVIOR_WEIGHTS.RESOURCE_TIMING * 2 // Double the weight
+ }
+ // Somewhat suspicious timing
+ else if (coefficientOfVariation < 0.2 && mean < 3000) {
+ // Still suspicious but less so
+ scoreFactors.RESOURCE_TIMING = BEHAVIOR_WEIGHTS.RESOURCE_TIMING
+ }
+
+ // Update average time between requests for future analysis
+ sessionData.averageTimeBetweenRequests = mean
+ }
+ }
+
+ // Add up all score factors
+ const additionalScore = Object.values(scoreFactors).reduce((sum, val) => sum + val, 0)
+ sessionData.score += additionalScore
+
+ // Update traffic type classification based on score
+ if (sessionData.score >= BOT_SCORE_THRESHOLDS.DEFINITELY_BOT) {
+ sessionData.trafficType = TrafficType.MALICIOUS_BOT
+ }
+ else if (sessionData.score >= BOT_SCORE_THRESHOLDS.LIKELY_BOT) {
+ sessionData.trafficType = TrafficType.SUSPICIOUS
+ }
+ else if (sessionData.score >= BOT_SCORE_THRESHOLDS.SUSPICIOUS) {
+ sessionData.trafficType = TrafficType.SUSPICIOUS
+ }
+ else {
+ sessionData.trafficType = TrafficType.REGULAR_USER
+ }
+
+ // Cap score at 100
+ sessionData.score = Math.min(100, sessionData.score)
+
+ // Update IP score based on session score with memory effect
+ // This allows the IP to be marked as suspicious based on behavior across multiple sessions
+ ipData.suspiciousScore = Math.max(
+ ipData.suspiciousScore * 0.9, // Decay previous score
+ sessionData.score * 0.8, // Influence from current session
+ )
+
+ // Increment legitimate session count if this seems to be a real user
+ if (sessionData.score <= BOT_SCORE_THRESHOLDS.PROBABLY_HUMAN
+ && sessionData.knownGoodActions >= 3) {
+ ipData.legitSessionsCount++
+
+ // If an IP has many legitimate sessions, gradually reduce its suspicious score
+ if (ipData.legitSessionsCount > 5 && ipData.suspiciousScore > 0) {
+ ipData.suspiciousScore = Math.max(0, ipData.suspiciousScore - 5)
+ }
+ }
+
+ // Save data back to storage
+ sessionData.lastUpdated = now
+ ipData.lastUpdated = now
+
+ behavior.ip.isBot = sessionData.score >= BOT_SCORE_THRESHOLDS.LIKELY_BOT
+ behavior.ip.isBotConfidence = (sessionData.score + ipData.suspiciousScore) / 2
+
+ behavior.session = sessionData
+ behavior.ip = ipData
+
+ // Add debug information if requested
+ if (debug) {
+ const sessionAge = now - sessionData.firstSeenAt
+ const avgInterval = sessionData.lastRequests.length > 1
+ ? sessionData.lastRequests.reduce((sum, req, i) => {
+ if (i === 0)
+ return 0
+ return sum + (req.timestamp - sessionData.lastRequests[i - 1].timestamp)
+ }, 0) / (sessionData.lastRequests.length - 1)
+ : 0
+
+ behavior.debug = {
+ sessionAge,
+ requestCount: sessionData.lastRequests.length,
+ pathHistory: sessionData.lastRequests.map(r => r.path),
+ timingAnalysis: {
+ avgInterval,
+ consistency: sessionData.requestSequenceEntropy,
+ entropy: sessionData.requestSequenceEntropy,
+ },
+ factors: detectionFactors,
+ ipInfo: {
+ sessionCount: ipData.sessionCount,
+ totalScore: ipData.suspiciousScore,
+ isBlocked: false, // TODO: get from IP checking
+ isTrusted: false, // TODO: get from IP checking
+ },
+ confidence: behavior.ip.isBotConfidence || 0,
+ reasoning,
+ }
+ }
+}
+
+// Enhanced bot detection with improved behavior analysis
+
+// Update bot score after request completion (to account for status codes)
+export function applyBehaviorForErrorPages(
+ status: number,
+ behavior: BotDetectionBehavior,
+) {
+ const sessionData = behavior.session!
+
+ // Update the last request with the status code
+ if (sessionData.lastRequests.length > 0) {
+ sessionData.lastRequests[sessionData.lastRequests.length - 1].status = status
+ }
+
+ // Count errors (404s, 403s, etc.)
+ if (status >= 400) {
+ sessionData.errorCount++
+
+ // Add score for repeated errors with progressive penalty
+ if (sessionData.errorCount > 2) {
+ // Apply increasing penalty for each error after the first few
+ const errorPenalty = Math.min(
+ BEHAVIOR_WEIGHTS.REPEATED_ERRORS,
+ BEHAVIOR_WEIGHTS.REPEATED_ERRORS * (sessionData.errorCount - 2) / 5,
+ )
+ sessionData.score += errorPenalty
+
+ // Check for consecutive errors
+ const recentRequests = sessionData.lastRequests.slice(-5)
+ const consecutiveErrors = recentRequests.filter(req => req.status && req.status >= 400).length
+
+ if (consecutiveErrors >= 3) {
+ // Strong bot signal: consecutive errors
+ sessionData.score += BEHAVIOR_WEIGHTS.REPEATED_ERRORS
+
+ // If a session shows significant behavior changes, make note of this
+ if (!sessionData.behaviorChangePoints) {
+ sessionData.behaviorChangePoints = []
+ }
+
+ // If we have a consecutive error pattern, consider this a behavior change point
+ sessionData.behaviorChangePoints.push(Date.now())
+ }
+ }
+
+ // 404s might indicate scanning for vulnerabilities
+ if (status === 404) {
+ // Apply smaller penalty for resource 404s
+ sessionData.score += BEHAVIOR_WEIGHTS.NONEXISTENT_RESOURCES * 0.3
+ }
+
+ // Cap score at 100
+ sessionData.score = Math.min(100, sessionData.score)
+
+ // Update traffic type if needed
+ if (sessionData.score >= BOT_SCORE_THRESHOLDS.DEFINITELY_BOT) {
+ sessionData.trafficType = TrafficType.MALICIOUS_BOT
+ }
+ else if (sessionData.score >= BOT_SCORE_THRESHOLDS.LIKELY_BOT) {
+ sessionData.trafficType = TrafficType.SUSPICIOUS
+ }
+ }
+ else if (status >= 200 && status < 300) {
+ // Successful requests may indicate legitimate use
+ // Especially 2xx on HTML pages
+ // Slightly reduce score for successful HTML page views
+ sessionData.score = Math.max(0, sessionData.score - 1)
+ sessionData.knownGoodActions += 0.5
+ }
+
+ // Update IP storage if the score changed significantly
+ if (Math.abs(sessionData.score - (sessionData.lastScore || 0)) > 10) {
+ if (behavior.ip) {
+ // If this session suddenly became very suspicious, update IP score immediately
+ if (sessionData.score >= BOT_SCORE_THRESHOLDS.LIKELY_BOT
+ && (sessionData.lastScore || 0) < BOT_SCORE_THRESHOLDS.SUSPICIOUS) {
+ behavior.ip.suspiciousScore = Math.max(behavior.ip.suspiciousScore, sessionData.score * 0.8)
+ }
+ }
+
+ // Remember the last score for future comparisons
+ sessionData.lastScore = sessionData.score
+ }
+}
diff --git a/libs/is-bot/src/behaviors/README.md b/libs/is-bot/src/behaviors/README.md
new file mode 100644
index 00000000..bac10023
--- /dev/null
+++ b/libs/is-bot/src/behaviors/README.md
@@ -0,0 +1,174 @@
+# Bot Detection Behaviors
+
+This directory contains modular bot detection behaviors that can be enabled/disabled independently. Each behavior is categorized by complexity and reliability.
+
+## 🟢 Simple Behaviors (Recommended for Production)
+
+### Path Analysis (`path-analysis.ts`)
+- **What it does**: Checks for access to sensitive paths like `/wp-admin`, `/.env`, `/admin`
+- **Reliability**: Very High
+- **Complexity**: Low
+- **False Positives**: Very Low
+- **Recommendation**: ✅ Always enable
+
+### Basic Timing (`timing-analysis.ts` - `analyzeBasicTiming`)
+- **What it does**: Detects robotic timing patterns (too consistent intervals)
+- **Reliability**: High
+- **Complexity**: Low
+- **False Positives**: Low
+- **Recommendation**: ✅ Enable for most sites
+
+### Basic Rate Limiting (`rate-limiting.ts` - `analyzeBasicRateLimit`)
+- **What it does**: Simple request rate checking with fixed thresholds
+- **Reliability**: High
+- **Complexity**: Low
+- **False Positives**: Low
+- **Recommendation**: ✅ Enable with appropriate thresholds
+
+### Basic User Agent (`user-agent-analysis.ts` - `analyzeBasicUserAgent`)
+- **What it does**: Checks for missing/suspicious user agents and bot signatures
+- **Reliability**: High
+- **Complexity**: Low
+- **False Positives**: Very Low
+- **Recommendation**: ✅ Always enable
+
+### Simple Patterns (`intent-analysis.ts` - `analyzeSimplePatterns`)
+- **What it does**: Detects obvious scanning patterns and sequential ID enumeration
+- **Reliability**: High
+- **Complexity**: Low
+- **False Positives**: Low
+- **Recommendation**: ✅ Enable for most sites
+
+### Basic Positive Signals (`positive-signals.ts` - `analyzeBasicPositiveSignals`)
+- **What it does**: Rewards search engine referrers, reasonable timing, auth sessions
+- **Reliability**: High
+- **Complexity**: Low
+- **False Positives**: Very Low
+- **Recommendation**: ✅ Always enable
+
+## 🟡 Intermediate Behaviors (Use with Caution)
+
+### Burst Detection (`rate-limiting.ts` - `analyzeBurstPattern`)
+- **What it does**: Detects sudden spikes in request activity
+- **Reliability**: Medium
+- **Complexity**: Medium
+- **False Positives**: Medium (can trigger during legitimate browsing spikes)
+- **Recommendation**: ⚠️ Test thoroughly before production
+
+### Header Consistency (`user-agent-analysis.ts` - `analyzeHeaderConsistency`)
+- **What it does**: Checks for missing/inconsistent browser headers
+- **Reliability**: Medium
+- **Complexity**: Medium
+- **False Positives**: Medium (some legitimate tools have minimal headers)
+- **Recommendation**: ⚠️ Consider for high-security environments
+
+### Contextual Rate Limiting (`rate-limiting.ts` - `analyzeContextualRateLimit`)
+- **What it does**: Adaptive rate limits based on user context and intent
+- **Reliability**: Medium
+- **Complexity**: High
+- **False Positives**: Medium
+- **Recommendation**: ⚠️ Requires careful tuning
+
+## 🔴 Advanced Behaviors (Experimental - High Risk)
+
+### Advanced Timing (`timing-analysis.ts` - `analyzeAdvancedTiming`)
+- **What it does**: Complex timing pattern analysis including periodic and mathematical progressions
+- **Reliability**: Low-Medium
+- **Complexity**: Very High
+- **False Positives**: High (complex timing can have false patterns)
+- **Recommendation**: ❌ Not recommended for production
+
+### Advanced Intent (`intent-analysis.ts` - `analyzeAdvancedIntent`)
+- **What it does**: Complex behavioral analysis for navigation patterns and diversity
+- **Reliability**: Low-Medium
+- **Complexity**: Very High
+- **False Positives**: High
+- **Recommendation**: ❌ Experimental only
+
+### Browser Fingerprinting (`user-agent-analysis.ts` - `analyzeBrowserFingerprint`)
+- **What it does**: Complex browser entropy and header order analysis
+- **Reliability**: Low
+- **Complexity**: Very High
+- **False Positives**: Very High
+- **Recommendation**: ❌ Not suitable for production
+
+### Advanced Positive Signals (`positive-signals.ts` - `analyzeAdvancedPositiveSignals`)
+- **What it does**: Complex credibility building and behavioral learning
+- **Reliability**: Medium
+- **Complexity**: High
+- **False Positives**: Medium
+- **Recommendation**: ⚠️ Requires significant testing
+
+### Behavioral Credibility (`positive-signals.ts` - `analyzeBehavioralCredibility`)
+- **What it does**: ML-like behavioral scoring with multiple factors
+- **Reliability**: Low-Medium
+- **Complexity**: Very High
+- **False Positives**: High
+- **Recommendation**: ❌ Research/experimental only
+
+## Configuration Example
+
+```typescript
+import { setBehaviorConfig } from './modular-analyzer'
+
+// Conservative production config
+setBehaviorConfig({
+ simple: {
+ pathAnalysis: { enabled: true, weight: 1.0 },
+ basicTiming: { enabled: true, weight: 0.8 },
+ basicRateLimit: { enabled: true, weight: 1.0 },
+ basicUserAgent: { enabled: true, weight: 1.0 },
+ simplePatterns: { enabled: true, weight: 1.0 },
+ basicPositiveSignals: { enabled: true, weight: 1.0 }
+ },
+ intermediate: {
+ burstDetection: { enabled: false, weight: 0.8 },
+ headerConsistency: { enabled: false, weight: 0.7 },
+ contextualRateLimit: { enabled: false, weight: 0.9 }
+ },
+ advanced: {
+ // All disabled for production
+ advancedTiming: { enabled: false, weight: 0.6 },
+ advancedIntent: { enabled: false, weight: 0.5 },
+ browserFingerprint: { enabled: false, weight: 0.4 },
+ advancedPositiveSignals: { enabled: false, weight: 0.6 },
+ behavioralCredibility: { enabled: false, weight: 0.3 }
+ }
+})
+```
+
+## Recommendations by Site Type
+
+### **E-commerce / High Traffic**
+- Enable: All simple behaviors
+- Consider: Basic burst detection (with higher thresholds)
+- Avoid: All advanced behaviors
+
+### **Content Sites / Blogs**
+- Enable: All simple behaviors except aggressive rate limiting
+- Consider: Header consistency for comment spam
+- Avoid: Complex timing analysis
+
+### **APIs / Developer Tools**
+- Enable: Path analysis, user agent, simple patterns
+- Consider: Contextual rate limiting
+- Avoid: Timing analysis (legitimate tools vary)
+
+### **High Security / Admin Panels**
+- Enable: All simple + intermediate behaviors
+- Consider: Advanced positive signals for known users
+- Monitor: All behaviors in non-blocking mode first
+
+## Testing Strategy
+
+1. **Start Simple**: Enable only green behaviors initially
+2. **Monitor**: Use debug mode to see behavior outputs
+3. **Gradual Addition**: Add one intermediate behavior at a time
+4. **A/B Test**: Compare detection rates and false positives
+5. **Never in Production**: Don't enable red behaviors in production
+
+## Performance Notes
+
+- Simple behaviors: Minimal performance impact
+- Intermediate behaviors: Slight performance impact
+- Advanced behaviors: Significant performance impact and maintenance overhead
diff --git a/libs/is-bot/src/behaviors/index.ts b/libs/is-bot/src/behaviors/index.ts
new file mode 100644
index 00000000..9544f6c6
--- /dev/null
+++ b/libs/is-bot/src/behaviors/index.ts
@@ -0,0 +1,73 @@
+// Modular bot detection behaviors
+// Each behavior can be enabled/disabled and configured independently
+
+export * from './intent-analysis'
+export * from './path-analysis'
+export * from './positive-signals'
+export * from './rate-limiting'
+export * from './timing-analysis'
+export * from './user-agent-analysis'
+
+// Behavior categories by complexity and reliability
+export const SIMPLE_BEHAVIORS = {
+ // High reliability, low complexity - recommended for production
+ pathAnalysis: 'analyzePathAccess',
+ basicTiming: 'analyzeBasicTiming',
+ basicRateLimit: 'analyzeBasicRateLimit',
+ basicUserAgent: 'analyzeBasicUserAgent',
+ simplePatterns: 'analyzeSimplePatterns',
+ basicPositiveSignals: 'analyzeBasicPositiveSignals',
+} as const
+
+export const INTERMEDIATE_BEHAVIORS = {
+ // Medium complexity, good reliability - use with caution
+ burstDetection: 'analyzeBurstPattern',
+ headerConsistency: 'analyzeHeaderConsistency',
+ contextualRateLimit: 'analyzeContextualRateLimit',
+} as const
+
+export const ADVANCED_BEHAVIORS = {
+ // High complexity, higher error rate - experimental
+ advancedTiming: 'analyzeAdvancedTiming',
+ advancedIntent: 'analyzeAdvancedIntent',
+ browserFingerprint: 'analyzeBrowserFingerprint',
+ advancedPositiveSignals: 'analyzeAdvancedPositiveSignals',
+ behavioralCredibility: 'analyzeBehavioralCredibility',
+} as const
+
+// Configuration interface
+export interface BehaviorConfig {
+ enabled: boolean
+ weight: number // Multiplier for the behavior's score
+ threshold?: number // Custom threshold for this behavior
+}
+
+export interface BotDetectionBehaviorConfig {
+ simple: Record
+ intermediate: Record
+ advanced: Record
+}
+
+// Default configuration - only simple behaviors enabled
+export const DEFAULT_BEHAVIOR_CONFIG: BotDetectionBehaviorConfig = {
+ simple: {
+ pathAnalysis: { enabled: true, weight: 1.0 },
+ basicTiming: { enabled: true, weight: 1.0 },
+ basicRateLimit: { enabled: true, weight: 1.0 },
+ basicUserAgent: { enabled: true, weight: 1.0 },
+ simplePatterns: { enabled: true, weight: 1.0 },
+ basicPositiveSignals: { enabled: true, weight: 1.0 },
+ },
+ intermediate: {
+ burstDetection: { enabled: false, weight: 0.8 },
+ headerConsistency: { enabled: false, weight: 0.7 },
+ contextualRateLimit: { enabled: false, weight: 0.9 },
+ },
+ advanced: {
+ advancedTiming: { enabled: false, weight: 0.6 },
+ advancedIntent: { enabled: false, weight: 0.5 },
+ browserFingerprint: { enabled: false, weight: 0.4 },
+ advancedPositiveSignals: { enabled: false, weight: 0.6 },
+ behavioralCredibility: { enabled: false, weight: 0.3 },
+ },
+}
diff --git a/libs/is-bot/src/behaviors/intent-analysis.ts b/libs/is-bot/src/behaviors/intent-analysis.ts
new file mode 100644
index 00000000..1a9c1f4d
--- /dev/null
+++ b/libs/is-bot/src/behaviors/intent-analysis.ts
@@ -0,0 +1,183 @@
+// User intent analysis behavior
+import type { SessionData } from '../behavior'
+
+/**
+ * Simple pattern detection for obvious bot behavior
+ * Low complexity, high confidence
+ */
+export function analyzeSimplePatterns(sessionData: SessionData): { score: number, reason: string } {
+ if (sessionData.lastRequests.length < 5) {
+ return { score: 0, reason: 'insufficient-data' }
+ }
+
+ const paths = sessionData.lastRequests.map(r => r.path)
+
+ // Check for obvious scanning patterns
+ const scanningIndicators = [
+ /\/admin/,
+ /\/wp-admin/,
+ /\/login/,
+ /\.php$/,
+ /\.asp$/,
+ /config/,
+ /backup/,
+ ]
+
+ let scanningHits = 0
+ for (const path of paths) {
+ for (const pattern of scanningIndicators) {
+ if (pattern.test(path)) {
+ scanningHits++
+ break
+ }
+ }
+ }
+
+ // If more than 50% of requests hit scanning patterns
+ if (scanningHits / paths.length > 0.5) {
+ return { score: 35, reason: `scanning-pattern: ${scanningHits}/${paths.length} hits` }
+ }
+
+ // Check for sequential numeric patterns (id scanning)
+ const numericPaths = paths.filter(p => /\/\d+/.test(p))
+ if (numericPaths.length >= 3) {
+ const numbers = numericPaths.map((p) => {
+ const match = p.match(/\/(\d+)/)
+ return match ? Number.parseInt(match[1]) : 0
+ }).sort((a, b) => a - b)
+
+ // Check if sequential
+ let sequential = true
+ for (let i = 1; i < numbers.length; i++) {
+ if (numbers[i] !== numbers[i - 1] + 1) {
+ sequential = false
+ break
+ }
+ }
+
+ if (sequential) {
+ return { score: 40, reason: 'sequential-id-scanning' }
+ }
+ }
+
+ return { score: 0, reason: 'normal-patterns' }
+}
+
+/**
+ * Advanced intent recognition with multiple behavioral indicators
+ * High complexity, higher chance of false positives
+ */
+export function analyzeAdvancedIntent(sessionData: SessionData): { score: number, reason: string } {
+ if (sessionData.lastRequests.length < 8) {
+ return { score: 0, reason: 'insufficient-data' }
+ }
+
+ const paths = sessionData.lastRequests.map(r => r.path)
+ const recentPaths = paths.slice(-10)
+
+ // Analyze navigation patterns
+ const navAnalysis = analyzeNavigationPatterns(recentPaths)
+ if (navAnalysis.suspicious) {
+ return { score: navAnalysis.score, reason: navAnalysis.reason }
+ }
+
+ // Analyze path diversity
+ const diversityAnalysis = analyzePathDiversity(recentPaths)
+ if (diversityAnalysis.suspicious) {
+ return { score: diversityAnalysis.score, reason: diversityAnalysis.reason }
+ }
+
+ // Analyze error patterns
+ const errorAnalysis = analyzeErrorPatterns(sessionData.lastRequests)
+ if (errorAnalysis.suspicious) {
+ return { score: errorAnalysis.score, reason: errorAnalysis.reason }
+ }
+
+ return { score: 0, reason: 'normal-advanced-intent' }
+}
+
+function analyzeNavigationPatterns(paths: string[]): { suspicious: boolean, score: number, reason: string } {
+ // Check for logical navigation flow
+ const hasLogicalFlow = checkLogicalFlow(paths)
+ if (!hasLogicalFlow) {
+ // Check if it's random or systematic
+ const pathSet = new Set(paths)
+ const uniqueRatio = pathSet.size / paths.length
+
+ if (uniqueRatio > 0.9) {
+ return { suspicious: true, score: 25, reason: 'random-navigation-pattern' }
+ }
+
+ // Check for alphabetical ordering
+ const sorted = [...paths].sort()
+ const isAlphabetical = paths.join('') === sorted.join('')
+ if (isAlphabetical) {
+ return { suspicious: true, score: 30, reason: 'alphabetical-scanning' }
+ }
+ }
+
+ return { suspicious: false, score: 0, reason: 'normal-navigation' }
+}
+
+function analyzePathDiversity(paths: string[]): { suspicious: boolean, score: number, reason: string } {
+ const sections = new Set(paths.map(p => `/${p.split('/')[1]}`).filter(Boolean))
+
+ // Too many different sections too quickly
+ if (sections.size > paths.length * 0.7 && paths.length > 5) {
+ return { suspicious: true, score: 20, reason: 'excessive-path-diversity' }
+ }
+
+ // All requests to same deep path structure
+ const pathStructures = paths.map(p => p.split('/').slice(0, 3).join('/'))
+ const structureSet = new Set(pathStructures)
+ if (structureSet.size === 1 && paths.length > 6) {
+ return { suspicious: true, score: 15, reason: 'narrow-path-focus' }
+ }
+
+ return { suspicious: false, score: 0, reason: 'normal-diversity' }
+}
+
+function analyzeErrorPatterns(requests: Array<{ path: string, status?: number }>): { suspicious: boolean, score: number, reason: string } {
+ const recentRequests = requests.slice(-10)
+ const errorCount = recentRequests.filter(r => r.status && r.status >= 400).length
+
+ // Too many errors suggests probing
+ if (errorCount > recentRequests.length * 0.6) {
+ return { suspicious: true, score: 25, reason: 'excessive-error-generation' }
+ }
+
+ // No errors at all can also be suspicious for exploration
+ if (errorCount === 0 && recentRequests.length > 8) {
+ const uniquePaths = new Set(recentRequests.map(r => r.path))
+ if (uniquePaths.size === recentRequests.length) {
+ return { suspicious: true, score: 10, reason: 'error-free-exploration' }
+ }
+ }
+
+ return { suspicious: false, score: 0, reason: 'normal-error-pattern' }
+}
+
+function checkLogicalFlow(paths: string[]): boolean {
+ // Very simple check for logical navigation
+ const hasHome = paths.some(p => p === '/' || p === '')
+ const hasDeepPaths = paths.some(p => p.split('/').length > 3)
+
+ // Basic logical flow: start at home or main sections, then go deeper
+ if (hasHome && hasDeepPaths) {
+ return true
+ }
+
+ // Check for common navigation patterns
+ const commonPatterns = [
+ /^\/$/, // Home
+ /^\/[^/]+$/, // Main section
+ /^\/[^/]+\/[^/]+$/, // Subsection
+ ]
+
+ const patternMatches = commonPatterns.map(pattern =>
+ paths.some(path => pattern.test(path)),
+ )
+
+ // If matches multiple levels, consider it logical
+ return patternMatches.filter(Boolean).length >= 2
+}
diff --git a/libs/is-bot/src/behaviors/path-analysis.ts b/libs/is-bot/src/behaviors/path-analysis.ts
new file mode 100644
index 00000000..851ec8a2
--- /dev/null
+++ b/libs/is-bot/src/behaviors/path-analysis.ts
@@ -0,0 +1,79 @@
+// Path-based bot detection behavior
+import type { H3Event } from 'h3'
+import type { ImprovedDetectionContext, SiteProfile } from '../improved-behavior'
+import { getResponseStatus } from 'h3'
+
+/**
+ * Simple path-based detection - checks for sensitive paths
+ * Low complexity, high reliability
+ */
+export function analyzePathAccess(
+ path: string,
+ _context: ImprovedDetectionContext,
+): { score: number, reason: string } {
+ // Very high confidence malicious patterns
+ const highRiskPatterns = [
+ /wp-config\.php/,
+ /\.env$/,
+ /phpmyadmin/,
+ /admin\.php/,
+ /wp-login\.php/,
+ ]
+
+ const mediumRiskPatterns = [
+ /\/admin$/,
+ /\/login$/,
+ /\/dashboard$/,
+ /\/config$/,
+ ]
+
+ // Check for high-risk patterns
+ for (const pattern of highRiskPatterns) {
+ if (pattern.test(path)) {
+ return { score: 40, reason: `high-risk-path: ${path}` }
+ }
+ }
+
+ // Check for medium-risk patterns
+ for (const pattern of mediumRiskPatterns) {
+ if (pattern.test(path)) {
+ return { score: 15, reason: `medium-risk-path: ${path}` }
+ }
+ }
+
+ return { score: 0, reason: 'normal-path' }
+}
+
+/**
+ * Build basic site profile by tracking successful responses
+ * Simple and reliable
+ */
+export function buildBasicSiteProfile(event: H3Event, existingProfile?: SiteProfile): SiteProfile {
+ const profile = existingProfile || {
+ detectedCMS: 'unknown',
+ hasAdminArea: false,
+ adminPaths: [],
+ apiEndpoints: [],
+ existingPaths: new Set(),
+ userAgentPatterns: new Map(),
+ legitimateAccessPatterns: [],
+ }
+
+ const path = event.path || ''
+ const status = getResponseStatus(event)
+
+ // Only track successful responses
+ if (status >= 200 && status < 300) {
+ profile.existingPaths.add(path)
+
+ // Simple CMS detection
+ if (path.includes('/wp-') || path.includes('wp-admin')) {
+ profile.detectedCMS = 'wordpress'
+ }
+ else if (path.includes('/_nuxt/')) {
+ profile.detectedCMS = 'nuxt'
+ }
+ }
+
+ return profile
+}
diff --git a/libs/is-bot/src/behaviors/positive-signals.ts b/libs/is-bot/src/behaviors/positive-signals.ts
new file mode 100644
index 00000000..d5c17b9e
--- /dev/null
+++ b/libs/is-bot/src/behaviors/positive-signals.ts
@@ -0,0 +1,188 @@
+// Positive signals that indicate legitimate users
+import type { SessionData } from '../behavior'
+import type { ImprovedDetectionContext } from '../improved-behavior'
+
+/**
+ * Simple positive signals - clear indicators of human behavior
+ * Low complexity, high confidence
+ */
+export function analyzeBasicPositiveSignals(
+ headers: Record,
+ sessionData: SessionData,
+): { score: number, reason: string } {
+ let positiveScore = 0
+ const reasons = []
+
+ // Legitimate referrer
+ const referrer = headers.referer || headers.referrer || ''
+ if (referrer && (
+ referrer.includes('google.com')
+ || referrer.includes('bing.com')
+ || referrer.includes('duckduckgo.com')
+ )) {
+ positiveScore += 10
+ reasons.push('search-engine-referrer')
+ }
+
+ // Time spent reading (reasonable intervals between requests)
+ if (sessionData.lastRequests.length >= 3) {
+ const intervals = []
+ for (let i = 1; i < sessionData.lastRequests.length; i++) {
+ const interval = sessionData.lastRequests[i].timestamp - sessionData.lastRequests[i - 1].timestamp
+ intervals.push(interval)
+ }
+
+ const avgInterval = intervals.reduce((sum, val) => sum + val, 0) / intervals.length
+
+ // 5-120 seconds between requests suggests reading
+ if (avgInterval > 5000 && avgInterval < 120000) {
+ positiveScore += 15
+ reasons.push('content-engagement')
+ }
+ }
+
+ // Standard browser headers
+ if (headers['accept-language'] && headers['accept-encoding']) {
+ positiveScore += 5
+ reasons.push('complete-headers')
+ }
+
+ // Authentication cookies (if present)
+ const cookies = headers.cookie || ''
+ if (cookies.includes('session') || cookies.includes('auth') || cookies.includes('login')) {
+ positiveScore += 20
+ reasons.push('authenticated-session')
+ }
+
+ if (positiveScore > 0) {
+ return { score: -positiveScore, reason: `positive-signals: ${reasons.join(', ')}` }
+ }
+
+ return { score: 0, reason: 'no-positive-signals' }
+}
+
+/**
+ * Advanced positive signal analysis with behavioral learning
+ * Higher complexity, may be less reliable
+ */
+export function analyzeAdvancedPositiveSignals(
+ headers: Record,
+ sessionData: SessionData,
+ _context: ImprovedDetectionContext,
+): { score: number, reason: string } {
+ let positiveScore = 0
+ const reasons = []
+
+ // Credibility building over time
+ if (sessionData.knownGoodActions > 5) {
+ positiveScore += Math.min(20, sessionData.knownGoodActions * 2)
+ reasons.push(`credibility-score: ${sessionData.knownGoodActions}`)
+ }
+
+ // Consistent user agent over session
+ if (sessionData.lastRequests.length > 5) {
+ // This would require tracking user agent per request - complex
+ positiveScore += 5
+ reasons.push('consistent-identity')
+ }
+
+ // Natural error patterns (humans make typos)
+ const recentRequests = sessionData.lastRequests.slice(-10)
+ const errorRate = recentRequests.filter(r => r.status === 404).length / recentRequests.length
+ if (errorRate > 0.05 && errorRate < 0.2) { // 5-20% error rate is human-like
+ positiveScore += 8
+ reasons.push('natural-error-pattern')
+ }
+
+ // Form interactions (if we tracked them)
+ // This would require additional tracking infrastructure
+
+ // Mobile vs desktop patterns
+ const userAgent = headers['user-agent'] || ''
+ if (userAgent.includes('Mobile') || userAgent.includes('iPhone') || userAgent.includes('Android')) {
+ // Mobile users often have different patterns
+ positiveScore += 5
+ reasons.push('mobile-device')
+ }
+
+ // Geographic consistency (would need IP geolocation)
+ // Complex and requires external services
+
+ if (positiveScore > 0) {
+ return { score: -positiveScore, reason: `advanced-positive: ${reasons.join(', ')}` }
+ }
+
+ return { score: 0, reason: 'no-advanced-positive-signals' }
+}
+
+/**
+ * Machine learning-like behavioral scoring
+ * Very complex, high maintenance overhead
+ */
+export function analyzeBehavioralCredibility(sessionData: SessionData): { score: number, reason: string } {
+ // This would ideally use a trained model, but for now we'll use heuristics
+
+ let credibilityScore = 50 // Start neutral
+ const factors = []
+
+ // Session age factor
+ const sessionAge = Date.now() - sessionData.firstSeenAt
+ if (sessionAge > 5 * 60 * 1000) { // 5+ minutes
+ credibilityScore += 10
+ factors.push('established-session')
+ }
+
+ // Request variety
+ const uniquePaths = new Set(sessionData.lastRequests.map(r => r.path))
+ const varietyRatio = uniquePaths.size / sessionData.lastRequests.length
+ if (varietyRatio > 0.3 && varietyRatio < 0.8) { // Sweet spot for humans
+ credibilityScore += 5
+ factors.push('good-path-variety')
+ }
+
+ // Timing variance (humans are inconsistent)
+ if (sessionData.lastRequests.length >= 5) {
+ const intervals = []
+ for (let i = 1; i < sessionData.lastRequests.length; i++) {
+ const interval = sessionData.lastRequests[i].timestamp - sessionData.lastRequests[i - 1].timestamp
+ intervals.push(interval)
+ }
+
+ const mean = intervals.reduce((sum, val) => sum + val, 0) / intervals.length
+ const variance = intervals.reduce((sum, val) => sum + (val - mean) ** 2, 0) / intervals.length
+ const coefficientOfVariation = Math.sqrt(variance) / mean
+
+ if (coefficientOfVariation > 0.3) { // Good human-like variance
+ credibilityScore += 8
+ factors.push('human-timing-variance')
+ }
+ }
+
+ // Error recovery (humans click back, retry, etc.)
+ const errors = sessionData.lastRequests.filter(r => r.status && r.status >= 400)
+ if (errors.length > 0 && errors.length < sessionData.lastRequests.length) {
+ // Check if there were successful requests after errors
+ const hasRecovery = errors.some(errorReq =>
+ sessionData.lastRequests.some(req =>
+ req.timestamp > errorReq.timestamp && (!req.status || req.status < 400),
+ ),
+ )
+
+ if (hasRecovery) {
+ credibilityScore += 10
+ factors.push('error-recovery-behavior')
+ }
+ }
+
+ // Convert credibility score to bot detection score
+ const adjustment = (credibilityScore - 50) * 0.3 // Scale to reasonable range
+
+ if (Math.abs(adjustment) > 2) {
+ return {
+ score: -adjustment,
+ reason: `behavioral-credibility: ${credibilityScore}/100 (${factors.join(', ')})`,
+ }
+ }
+
+ return { score: 0, reason: 'neutral-credibility' }
+}
diff --git a/libs/is-bot/src/behaviors/rate-limiting.ts b/libs/is-bot/src/behaviors/rate-limiting.ts
new file mode 100644
index 00000000..682d0bd8
--- /dev/null
+++ b/libs/is-bot/src/behaviors/rate-limiting.ts
@@ -0,0 +1,102 @@
+// Rate limiting bot detection behavior
+import type { SessionData } from '../behavior'
+import type { ImprovedDetectionContext } from '../improved-behavior'
+
+/**
+ * Simple rate limiting check
+ * Low complexity, high reliability
+ */
+export function analyzeBasicRateLimit(sessionData: SessionData): { score: number, reason: string } {
+ const now = Date.now()
+ const oneMinuteAgo = now - 60000
+ const requestsLastMinute = sessionData.lastRequests.filter(r => r.timestamp > oneMinuteAgo).length
+
+ // Fixed thresholds - simple and predictable
+ if (requestsLastMinute > 30) {
+ return { score: 50, reason: `excessive-requests: ${requestsLastMinute}/min` }
+ }
+
+ if (requestsLastMinute > 20) {
+ return { score: 25, reason: `high-requests: ${requestsLastMinute}/min` }
+ }
+
+ if (requestsLastMinute > 15) {
+ return { score: 10, reason: `elevated-requests: ${requestsLastMinute}/min` }
+ }
+
+ return { score: 0, reason: 'normal-rate' }
+}
+
+/**
+ * Context-aware rate limiting with adaptive thresholds
+ * Higher complexity, may be error-prone in edge cases
+ */
+export function analyzeContextualRateLimit(
+ sessionData: SessionData,
+ context: ImprovedDetectionContext,
+): { score: number, reason: string } {
+ const now = Date.now()
+ const oneMinuteAgo = now - 60000
+ const requestsLastMinute = sessionData.lastRequests.filter(r => r.timestamp > oneMinuteAgo).length
+
+ // Dynamic rate limits based on context
+ let rateLimit = 15 // Default
+
+ // Adjust based on authentication status
+ if (context.authenticationStatus === 'authenticated') {
+ rateLimit = 30 // Authenticated users get higher limits
+ }
+
+ // Adjust based on user intent
+ if (context.userIntent === 'scanning') {
+ rateLimit = 5 // Very strict for scanners
+ }
+ else if (context.userIntent === 'browsing') {
+ rateLimit = 25 // More lenient for browsers
+ }
+
+ // Adjust based on referrer
+ if (context.referrerContext === 'search-engine') {
+ rateLimit += 5 // Slight bonus for search engine referrals
+ }
+
+ if (requestsLastMinute > rateLimit) {
+ const overage = requestsLastMinute - rateLimit
+ const score = Math.min(50, overage * 3)
+ return {
+ score,
+ reason: `contextual-rate-exceeded: ${requestsLastMinute}/${rateLimit} (intent: ${context.userIntent})`,
+ }
+ }
+
+ return { score: 0, reason: 'within-rate-limit' }
+}
+
+/**
+ * Burst detection - looks for sudden spikes in activity
+ * Medium complexity, good for catching automated tools
+ */
+export function analyzeBurstPattern(sessionData: SessionData): { score: number, reason: string } {
+ if (sessionData.lastRequests.length < 10) {
+ return { score: 0, reason: 'insufficient-data' }
+ }
+
+ const now = Date.now()
+ const intervals = [10000, 30000, 60000] // 10s, 30s, 1min windows
+
+ for (const interval of intervals) {
+ const windowStart = now - interval
+ const requestsInWindow = sessionData.lastRequests.filter(r => r.timestamp > windowStart).length
+ const expectedMax = Math.ceil(interval / 2000) // Rough estimate: 1 request per 2 seconds max
+
+ if (requestsInWindow > expectedMax * 2) {
+ const windowSeconds = interval / 1000
+ return {
+ score: 30,
+ reason: `burst-detected: ${requestsInWindow} requests in ${windowSeconds}s`,
+ }
+ }
+ }
+
+ return { score: 0, reason: 'normal-burst-pattern' }
+}
diff --git a/libs/is-bot/src/behaviors/timing-analysis.ts b/libs/is-bot/src/behaviors/timing-analysis.ts
new file mode 100644
index 00000000..42e5ef22
--- /dev/null
+++ b/libs/is-bot/src/behaviors/timing-analysis.ts
@@ -0,0 +1,104 @@
+// Timing-based bot detection behavior
+import type { SessionData } from '../behavior'
+
+/**
+ * Basic timing consistency check
+ * Simple and reliable - checks for robotic timing patterns
+ */
+export function analyzeBasicTiming(sessionData: SessionData): { score: number, reason: string } {
+ if (sessionData.lastRequests.length < 5) {
+ return { score: 0, reason: 'insufficient-data' }
+ }
+
+ const intervals = []
+ for (let i = 1; i < sessionData.lastRequests.length; i++) {
+ const interval = sessionData.lastRequests[i].timestamp - sessionData.lastRequests[i - 1].timestamp
+ intervals.push(interval)
+ }
+
+ const mean = intervals.reduce((sum, val) => sum + val, 0) / intervals.length
+ const variance = intervals.reduce((sum, val) => sum + (val - mean) ** 2, 0) / intervals.length
+ const stdDev = Math.sqrt(variance)
+ const coefficientOfVariation = stdDev / mean
+
+ // Very consistent timing is suspicious
+ if (coefficientOfVariation < 0.05 && mean < 2000) {
+ return { score: 35, reason: 'robotic-timing-detected' }
+ }
+
+ // Somewhat consistent timing
+ if (coefficientOfVariation < 0.15 && mean < 1000) {
+ return { score: 20, reason: 'suspicious-timing-pattern' }
+ }
+
+ return { score: 0, reason: 'human-like-timing' }
+}
+
+/**
+ * Advanced timing analysis with more complex patterns
+ * Higher complexity, may have false positives
+ */
+export function analyzeAdvancedTiming(sessionData: SessionData): { score: number, reason: string } {
+ if (sessionData.lastRequests.length < 10) {
+ return { score: 0, reason: 'insufficient-data' }
+ }
+
+ const intervals = []
+ for (let i = 1; i < sessionData.lastRequests.length; i++) {
+ const interval = sessionData.lastRequests[i].timestamp - sessionData.lastRequests[i - 1].timestamp
+ intervals.push(interval)
+ }
+
+ // Check for periodic patterns (e.g., every 1000ms, 2000ms, etc.)
+ const periodicPattern = checkPeriodicPattern(intervals)
+ if (periodicPattern.detected) {
+ return { score: 40, reason: `periodic-pattern: ${periodicPattern.period}ms` }
+ }
+
+ // Check for mathematical progressions
+ const progression = checkMathematicalProgression(intervals)
+ if (progression.detected) {
+ return { score: 30, reason: `mathematical-progression: ${progression.type}` }
+ }
+
+ return analyzeBasicTiming(sessionData)
+}
+
+function checkPeriodicPattern(intervals: number[]): { detected: boolean, period?: number } {
+ const tolerance = 50 // 50ms tolerance
+
+ // Check common periods
+ const commonPeriods = [500, 1000, 1500, 2000, 3000, 5000]
+
+ for (const period of commonPeriods) {
+ const matches = intervals.filter(interval =>
+ Math.abs(interval - period) <= tolerance,
+ )
+
+ if (matches.length >= Math.ceil(intervals.length * 0.6)) {
+ return { detected: true, period }
+ }
+ }
+
+ return { detected: false }
+}
+
+function checkMathematicalProgression(intervals: number[]): { detected: boolean, type?: string } {
+ if (intervals.length < 5)
+ return { detected: false }
+
+ // Check arithmetic progression
+ const diffs = []
+ for (let i = 1; i < intervals.length; i++) {
+ diffs.push(intervals[i] - intervals[i - 1])
+ }
+
+ const avgDiff = diffs.reduce((sum, val) => sum + val, 0) / diffs.length
+ const diffVariance = diffs.reduce((sum, val) => sum + (val - avgDiff) ** 2, 0) / diffs.length
+
+ if (Math.sqrt(diffVariance) < 10 && Math.abs(avgDiff) > 5) {
+ return { detected: true, type: 'arithmetic' }
+ }
+
+ return { detected: false }
+}
diff --git a/libs/is-bot/src/behaviors/user-agent-analysis.ts b/libs/is-bot/src/behaviors/user-agent-analysis.ts
new file mode 100644
index 00000000..b5697553
--- /dev/null
+++ b/libs/is-bot/src/behaviors/user-agent-analysis.ts
@@ -0,0 +1,154 @@
+// User agent and header analysis
+import type { SessionData } from '../behavior'
+
+/**
+ * Basic user agent validation
+ * Simple and reliable
+ */
+export function analyzeBasicUserAgent(headers: Record): { score: number, reason: string } {
+ const userAgent = headers['user-agent'] || ''
+
+ // Missing user agent
+ if (!userAgent) {
+ return { score: 30, reason: 'missing-user-agent' }
+ }
+
+ // Too short to be real
+ if (userAgent.length < 20) {
+ return { score: 25, reason: 'suspicious-user-agent-length' }
+ }
+
+ // Common bot signatures
+ const botSignatures = [
+ /bot/i,
+ /crawler/i,
+ /spider/i,
+ /scraper/i,
+ /curl/i,
+ /wget/i,
+ /python-requests/i,
+ ]
+
+ for (const pattern of botSignatures) {
+ if (pattern.test(userAgent)) {
+ return { score: 40, reason: 'bot-signature-detected' }
+ }
+ }
+
+ return { score: 0, reason: 'normal-user-agent' }
+}
+
+/**
+ * Advanced header consistency analysis
+ * Higher complexity, may have false positives
+ */
+export function analyzeHeaderConsistency(headers: Record): { score: number, reason: string } {
+ const userAgent = headers['user-agent'] || ''
+ const acceptLanguage = headers['accept-language'] || ''
+ const acceptEncoding = headers['accept-encoding'] || ''
+ const accept = headers.accept || ''
+
+ let suspiciousCount = 0
+ const issues = []
+
+ // Check for basic browser headers
+ if (!accept) {
+ suspiciousCount++
+ issues.push('missing-accept-header')
+ }
+
+ if (!acceptLanguage) {
+ suspiciousCount++
+ issues.push('missing-accept-language')
+ }
+
+ if (!acceptEncoding) {
+ suspiciousCount++
+ issues.push('missing-accept-encoding')
+ }
+
+ // Check for inconsistencies
+ if (userAgent.includes('Chrome') && !acceptEncoding.includes('gzip')) {
+ suspiciousCount++
+ issues.push('chrome-without-gzip')
+ }
+
+ if (userAgent.includes('Mozilla') && !userAgent.includes('Gecko') && !userAgent.includes('WebKit')) {
+ suspiciousCount++
+ issues.push('invalid-mozilla-signature')
+ }
+
+ // Score based on number of issues
+ if (suspiciousCount >= 3) {
+ return { score: 35, reason: `header-inconsistency: ${issues.join(', ')}` }
+ }
+
+ if (suspiciousCount >= 2) {
+ return { score: 20, reason: `header-issues: ${issues.join(', ')}` }
+ }
+
+ if (suspiciousCount >= 1) {
+ return { score: 10, reason: `minor-header-issue: ${issues.join(', ')}` }
+ }
+
+ return { score: 0, reason: 'consistent-headers' }
+}
+
+/**
+ * Browser fingerprinting analysis
+ * Very complex, high chance of false positives
+ */
+export function analyzeBrowserFingerprint(
+ headers: Record,
+ _sessionData: SessionData,
+): { score: number, reason: string } {
+ const userAgent = headers['user-agent'] || ''
+ const acceptLanguage = headers['accept-language'] || ''
+
+ // Calculate "fingerprint entropy"
+ let entropy = 0
+ const features = []
+
+ // User agent entropy
+ if (userAgent) {
+ entropy += Math.log2(userAgent.length)
+ features.push('user-agent')
+ }
+
+ // Language entropy
+ if (acceptLanguage) {
+ const languages = acceptLanguage.split(',').length
+ entropy += Math.log2(languages + 1)
+ features.push('languages')
+ }
+
+ // Client hints
+ if (headers['sec-ch-ua']) {
+ entropy += 2
+ features.push('client-hints')
+ }
+
+ // DNT header
+ if (headers.dnt) {
+ entropy += 1
+ features.push('dnt')
+ }
+
+ // Very low entropy suggests a bot
+ if (entropy < 3 && features.length < 2) {
+ return { score: 25, reason: `low-browser-entropy: ${entropy.toFixed(1)}` }
+ }
+
+ // Check for header order consistency (browsers typically send headers in specific orders)
+ const headerOrder = Object.keys(headers)
+ if (headerOrder.length > 5) {
+ // This is complex and error-prone - simplified version
+ const hasStandardOrder = headerOrder.includes('user-agent')
+ && headerOrder.includes('accept')
+ if (!hasStandardOrder) {
+ return { score: 15, reason: 'unusual-header-order' }
+ }
+ }
+
+ return { score: 0, reason: 'normal-browser-fingerprint' }
+}
diff --git a/libs/is-bot/src/core.ts b/libs/is-bot/src/core.ts
new file mode 100644
index 00000000..2e41e3c1
--- /dev/null
+++ b/libs/is-bot/src/core.ts
@@ -0,0 +1,359 @@
+// Core bot detection engine - H3/Nuxt focused
+import type { H3Event } from 'h3'
+import type {
+ BotDetectionConfig,
+ BotDetectionRequest,
+ BotDetectionResponse,
+ DetectionContext,
+ ResponseStatusProvider,
+ SessionIdentifier,
+ SiteProfile
+} from './types'
+import type { IPData, SessionData } from './behavior'
+import type { BehaviorStorage } from './adapters/behavior-storage'
+import { modularBotAnalysis, DEFAULT_BEHAVIOR_CONFIG, type BotDetectionBehaviorConfig } from './modular-analyzer'
+import { type BotDetectionBehavior, TrafficType } from './behavior'
+
+export class BotDetectionEngine {
+ private storage: BehaviorStorage
+ private sessionIdentifier: SessionIdentifier
+ private responseStatusProvider?: ResponseStatusProvider
+ private config: BotDetectionConfig
+ private behaviorConfig: BotDetectionBehaviorConfig
+ private siteProfile: SiteProfile | null = null
+
+ constructor(options: {
+ storage: BehaviorStorage
+ sessionIdentifier: SessionIdentifier
+ responseStatusProvider?: ResponseStatusProvider
+ config?: BotDetectionConfig
+ }) {
+ this.storage = options.storage
+ this.sessionIdentifier = options.sessionIdentifier
+ this.responseStatusProvider = options.responseStatusProvider
+ this.config = {
+ session: {
+ ttl: 24 * 60 * 60 * 1000, // 24 hours
+ maxSessionsPerIP: 10,
+ ...options.config?.session
+ },
+ thresholds: {
+ definitelyBot: 90,
+ likelyBot: 70,
+ suspicious: 40,
+ ...options.config?.thresholds
+ },
+ customSensitivePaths: options.config?.customSensitivePaths || [],
+ ipFilter: {
+ trustedIPs: ['127.0.0.1', '::1'],
+ blockedIPs: [],
+ ...options.config?.ipFilter
+ },
+ debug: options.config?.debug || false,
+ behaviors: options.config?.behaviors
+ }
+
+ // Merge behavior configuration
+ this.behaviorConfig = {
+ simple: { ...DEFAULT_BEHAVIOR_CONFIG.simple, ...this.config.behaviors?.simple },
+ intermediate: { ...DEFAULT_BEHAVIOR_CONFIG.intermediate, ...this.config.behaviors?.intermediate },
+ advanced: { ...DEFAULT_BEHAVIOR_CONFIG.advanced, ...this.config.behaviors?.advanced }
+ }
+ }
+
+ async analyze(request: BotDetectionRequest, event?: H3Event): Promise {
+ const timestamp = request.timestamp || Date.now()
+
+ // Get session ID
+ const sessionId = await this.sessionIdentifier.getSessionId(request)
+
+ // Check IP blocklist/allowlist
+ if (this.isIPBlocked(request.ip)) {
+ return this.createBlockedResponse(sessionId, 'ip-blocked')
+ }
+
+ if (this.isIPTrusted(request.ip)) {
+ return this.createTrustedResponse(sessionId, 'ip-trusted')
+ }
+
+ // Get or create session and IP data
+ const [sessionData, ipData] = await Promise.all([
+ this.getOrCreateSession(sessionId, timestamp),
+ this.getOrCreateIPData(request.ip, sessionId, timestamp)
+ ])
+
+ // Get or create site profile
+ this.siteProfile = await this.getOrCreateSiteProfile(request)
+
+ // Create behavior object for analysis
+ const behavior: BotDetectionBehavior = {
+ id: sessionId,
+ session: sessionData,
+ ip: ipData,
+ dirty: false
+ }
+
+ // Run modular analysis - requires H3Event
+ let analysis
+ if (event) {
+ analysis = modularBotAnalysis({
+ event,
+ behavior,
+ config: this.behaviorConfig,
+ debug: this.config.debug
+ })
+ } else {
+ // Fallback analysis without H3Event
+ analysis = {
+ botScore: this.basicBotScore(request, sessionData, ipData),
+ confidence: 50,
+ factors: [],
+ recommendation: 'allow' as const
+ }
+ }
+
+ // Update session data
+ this.updateSessionData(sessionData, request, timestamp)
+
+ // Update IP data
+ this.updateIPData(ipData, sessionData, timestamp)
+
+ // Apply response status if available
+ if (this.responseStatusProvider) {
+ const status = this.responseStatusProvider.getStatus(request)
+ if (status) {
+ this.applyResponseStatus(sessionData, status)
+ }
+ }
+
+ // Save updated data
+ await Promise.all([
+ this.storage.setSession(sessionId, sessionData),
+ this.storage.setIP(request.ip, ipData),
+ this.siteProfile ? this.storage.setSiteProfile(this.siteProfile) : Promise.resolve()
+ ])
+
+ return {
+ isBot: analysis.botScore >= (this.config.thresholds?.likelyBot || 70),
+ confidence: analysis.confidence,
+ score: analysis.botScore,
+ factors: analysis.factors,
+ recommendation: analysis.recommendation,
+ sessionId
+ }
+ }
+
+ private isIPBlocked(ip: string): boolean {
+ return this.config.ipFilter?.blockedIPs?.includes(ip) || false
+ }
+
+ private isIPTrusted(ip: string): boolean {
+ return this.config.ipFilter?.trustedIPs?.includes(ip) || false
+ }
+
+ private async getOrCreateSession(sessionId: string, timestamp: number): Promise {
+ const existing = await this.storage.getSession(sessionId)
+ if (existing) {
+ return existing
+ }
+
+ return {
+ lastRequests: [],
+ suspiciousPathHits: 0,
+ maybeSensitivePathHits: 0,
+ uniqueSensitivePathsAccessed: [],
+ errorCount: 0,
+ score: 0,
+ lastScore: 0,
+ lastUpdated: timestamp,
+ knownGoodActions: 0,
+ requestMethodVariety: [],
+ requestSequenceEntropy: 0,
+ firstSeenAt: timestamp,
+ behaviorChangePoints: [],
+ trafficType: TrafficType.UNKNOWN
+ }
+ }
+
+ private async getOrCreateIPData(ip: string, sessionId: string, timestamp: number): Promise {
+ const existing = await this.storage.getIP(ip)
+ if (existing) {
+ // Add session if not already tracked
+ if (!existing.activeSessions.includes(sessionId)) {
+ existing.activeSessions.push(sessionId)
+ existing.sessionCount = existing.activeSessions.length
+ }
+ return existing
+ }
+
+ return {
+ sessionCount: 1,
+ activeSessions: [sessionId],
+ suspiciousScore: 0,
+ lastUpdated: timestamp,
+ legitSessionsCount: 0,
+ factores: [],
+ isBot: false,
+ isBotConfidence: 0,
+ lastSessionCreated: timestamp
+ }
+ }
+
+ private async getOrCreateSiteProfile(request: BotDetectionRequest): Promise {
+ const existing = await this.storage.getSiteProfile()
+ if (existing) {
+ return existing
+ }
+
+ return {
+ detectedCMS: 'unknown',
+ hasAdminArea: false,
+ adminPaths: [],
+ apiEndpoints: [],
+ existingPaths: new Set(),
+ userAgentPatterns: new Map(),
+ legitimateAccessPatterns: []
+ }
+ }
+
+ private updateSessionData(sessionData: SessionData, request: BotDetectionRequest, timestamp: number) {
+ // Add current request
+ sessionData.lastRequests.push({
+ timestamp,
+ path: request.path,
+ method: request.method,
+ timeSincePrevious: sessionData.lastRequests.length > 0
+ ? timestamp - sessionData.lastRequests[sessionData.lastRequests.length - 1].timestamp
+ : 0
+ })
+
+ // Keep only last 30 requests
+ if (sessionData.lastRequests.length > 30) {
+ sessionData.lastRequests.shift()
+ }
+
+ // Update method variety
+ if (!sessionData.requestMethodVariety.includes(request.method)) {
+ sessionData.requestMethodVariety.push(request.method)
+ }
+
+ sessionData.lastUpdated = timestamp
+ }
+
+ private updateIPData(ipData: IPData, sessionData: SessionData, timestamp: number) {
+ ipData.lastUpdated = timestamp
+
+ // Update IP score based on session score
+ ipData.suspiciousScore = Math.max(
+ ipData.suspiciousScore * 0.9, // Decay
+ sessionData.score * 0.8 // Current session influence
+ )
+
+ // Update bot confidence
+ ipData.isBotConfidence = (sessionData.score + ipData.suspiciousScore) / 2
+ ipData.isBot = sessionData.score >= (this.config.thresholds?.likelyBot || 70)
+ }
+
+ private applyResponseStatus(sessionData: SessionData, status: number) {
+ // Update the last request with status
+ if (sessionData.lastRequests.length > 0) {
+ sessionData.lastRequests[sessionData.lastRequests.length - 1].status = status
+ }
+
+ // Count errors
+ if (status >= 400) {
+ sessionData.errorCount++
+
+ // Apply error penalty
+ if (sessionData.errorCount > 2) {
+ sessionData.score += Math.min(15, sessionData.errorCount * 2)
+ }
+ } else if (status >= 200 && status < 300) {
+ // Successful requests indicate legitimate use
+ sessionData.score = Math.max(0, sessionData.score - 1)
+ sessionData.knownGoodActions += 0.5
+ }
+
+ // Cap score
+ sessionData.score = Math.min(100, sessionData.score)
+ }
+
+ private basicBotScore(request: BotDetectionRequest, sessionData: SessionData, ipData: IPData): number {
+ let score = 0
+
+ // Basic user agent check
+ const userAgent = Array.isArray(request.headers['user-agent'])
+ ? request.headers['user-agent'][0] || ''
+ : request.headers['user-agent'] || ''
+ if (!userAgent || userAgent.length < 20) {
+ score += 30
+ }
+
+ // Check for common bot patterns
+ const botPatterns = /bot|crawler|spider|scraper|curl|wget|python-requests/i
+ if (botPatterns.test(userAgent)) {
+ score += 50
+ }
+
+ // Rate limiting - simple check
+ if (sessionData.lastRequests.length > 10) {
+ const avgInterval = sessionData.lastRequests.reduce((sum, req, i) => {
+ return i > 0 ? sum + (req.timestamp - sessionData.lastRequests[i-1].timestamp) : sum
+ }, 0) / Math.max(1, sessionData.lastRequests.length - 1)
+
+ if (avgInterval < 1000) { // Less than 1 second between requests
+ score += 25
+ }
+ }
+
+ // Sensitive path access
+ const sensitivePaths = ['/admin', '/wp-admin', '/.env', '/wp-login']
+ if (sensitivePaths.some(path => request.path.includes(path))) {
+ score += 20
+ }
+
+ return Math.min(100, score)
+ }
+
+ private createBlockedResponse(sessionId: string, reason: string): BotDetectionResponse {
+ return {
+ isBot: true,
+ confidence: 100,
+ score: 100,
+ factors: [{ type: 'IP_FILTER', score: 100, reason }],
+ recommendation: 'block',
+ sessionId
+ }
+ }
+
+ private createTrustedResponse(sessionId: string, reason: string): BotDetectionResponse {
+ return {
+ isBot: false,
+ confidence: 100,
+ score: 0,
+ factors: [{ type: 'IP_FILTER', score: -100, reason }],
+ recommendation: 'allow',
+ sessionId
+ }
+ }
+
+ // Public configuration methods
+ updateConfig(config: Partial) {
+ this.config = { ...this.config, ...config }
+ }
+
+ updateBehaviorConfig(config: Partial) {
+ this.behaviorConfig = {
+ simple: { ...this.behaviorConfig.simple, ...config.simple },
+ intermediate: { ...this.behaviorConfig.intermediate, ...config.intermediate },
+ advanced: { ...this.behaviorConfig.advanced, ...config.advanced }
+ }
+ }
+
+ // Cleanup method
+ async cleanup() {
+ if (this.storage.cleanup) {
+ await this.storage.cleanup()
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/is-bot/src/drivers/h3.ts b/libs/is-bot/src/drivers/h3.ts
new file mode 100644
index 00000000..9517502f
--- /dev/null
+++ b/libs/is-bot/src/drivers/h3.ts
@@ -0,0 +1,146 @@
+// H3 driver for bot detection
+import type { H3Event } from 'h3'
+import { getHeaders, getRequestIP, getResponseStatus, useSession } from 'h3'
+import type {
+ BotDetectionDriver,
+ BotDetectionRequestData,
+ BotDetectionDriverOptions
+} from './types'
+
+export class H3Driver implements BotDetectionDriver {
+ private options: BotDetectionDriverOptions
+
+ constructor(options: BotDetectionDriverOptions = {}) {
+ this.options = {
+ sessionConfig: {
+ password: 'default-bot-detection-password',
+ cookieName: 'nuxt-session',
+ ...options.sessionConfig
+ },
+ ipExtraction: {
+ trustProxy: true,
+ proxyHeaders: ['x-forwarded-for', 'x-real-ip'],
+ ...options.ipExtraction
+ },
+ debug: options.debug || false
+ }
+ }
+
+ extractRequest(event: H3Event): BotDetectionRequestData {
+ const headers = getHeaders(event)
+ const ip = getRequestIP(event, {
+ xForwardedFor: this.options.ipExtraction?.trustProxy
+ }) || '127.0.0.1'
+
+ const userAgent = this.getHeaderValue(headers, 'user-agent')
+ const referer = this.getHeaderValue(headers, 'referer') || this.getHeaderValue(headers, 'referrer')
+ const acceptLanguage = this.getHeaderValue(headers, 'accept-language')
+ const acceptEncoding = this.getHeaderValue(headers, 'accept-encoding')
+
+ return {
+ path: event.path || '/',
+ method: event.method || 'GET',
+ headers,
+ ip,
+ timestamp: Date.now(),
+ userAgent,
+ referer,
+ acceptLanguage,
+ acceptEncoding
+ }
+ }
+
+ async extractSessionId(event: H3Event): Promise {
+ try {
+ const session = await useSession(event, {
+ password: this.options.sessionConfig?.password || 'default-bot-detection-password'
+ })
+ return session.id
+ } catch (error) {
+ if (this.options.debug) {
+ console.warn('Failed to get session, falling back to IP-based session:', error)
+ }
+ // Fallback to IP + User Agent hash
+ return this.generateFallbackSessionId(event)
+ }
+ }
+
+ extractResponseStatus(event: H3Event): number | undefined {
+ try {
+ return getResponseStatus(event)
+ } catch {
+ return undefined
+ }
+ }
+
+ isTrustedIP(ip: string): boolean {
+ // Common trusted IP ranges
+ const trustedRanges = [
+ '127.0.0.1',
+ '::1',
+ '10.0.0.0/8',
+ '172.16.0.0/12',
+ '192.168.0.0/16'
+ ]
+
+ // Simple check for exact matches (in production, use proper CIDR matching)
+ return trustedRanges.some(range => {
+ if (range.includes('/')) {
+ // Simplified CIDR check - in production use proper library
+ const baseIP = range.split('/')[0]
+ return ip.startsWith(baseIP.substring(0, baseIP.lastIndexOf('.')))
+ }
+ return ip === range
+ })
+ }
+
+ getAdditionalContext(event: H3Event): Record {
+ const headers = getHeaders(event)
+
+ return {
+ protocol: headers['x-forwarded-proto'] || 'http',
+ host: headers.host,
+ connection: headers.connection,
+ upgradeInsecureRequests: headers['upgrade-insecure-requests'],
+ secFetchSite: headers['sec-fetch-site'],
+ secFetchMode: headers['sec-fetch-mode'],
+ secFetchUser: headers['sec-fetch-user'],
+ secFetchDest: headers['sec-fetch-dest'],
+ secChUa: headers['sec-ch-ua'],
+ secChUaMobile: headers['sec-ch-ua-mobile'],
+ secChUaPlatform: headers['sec-ch-ua-platform']
+ }
+ }
+
+ private getHeaderValue(headers: Record, name: string): string | undefined {
+ const value = headers[name]
+ if (Array.isArray(value)) {
+ return value[0]
+ }
+ return value
+ }
+
+ private generateFallbackSessionId(event: H3Event): string {
+ const headers = getHeaders(event)
+ const ip = getRequestIP(event, { xForwardedFor: this.options.ipExtraction?.trustProxy }) || '127.0.0.1'
+ const userAgent = this.getHeaderValue(headers, 'user-agent') || ''
+
+ // Create a deterministic session ID from IP and User Agent
+ return `fallback-${this.simpleHash(`${ip}-${userAgent}`)}`
+ }
+
+ private simpleHash(str: string): string {
+ let hash = 0
+ for (let i = 0; i < str.length; i++) {
+ const char = str.charCodeAt(i)
+ hash = ((hash << 5) - hash) + char
+ hash = hash & hash // Convert to 32-bit integer
+ }
+ return Math.abs(hash).toString(36)
+ }
+}
+
+// Convenience function for quick H3 integration
+export function createH3BotDetection(options: BotDetectionDriverOptions = {}) {
+ return new H3Driver(options)
+}
\ No newline at end of file
diff --git a/libs/is-bot/src/drivers/types.ts b/libs/is-bot/src/drivers/types.ts
new file mode 100644
index 00000000..b1ca7895
--- /dev/null
+++ b/libs/is-bot/src/drivers/types.ts
@@ -0,0 +1,51 @@
+// Driver interface types for bot detection
+export interface BotDetectionDriver {
+ /**
+ * Extract bot detection request data from the framework's request object
+ */
+ extractRequest(request: TRequest): BotDetectionRequestData
+
+ /**
+ * Extract session ID from the framework's request
+ */
+ extractSessionId(request: TRequest): Promise | string
+
+ /**
+ * Extract response status from the framework's response (if available)
+ */
+ extractResponseStatus?(request: TRequest, response?: TResponse): number | undefined
+
+ /**
+ * Check if IP is from a trusted source (e.g., load balancer)
+ */
+ isTrustedIP?(ip: string): boolean
+
+ /**
+ * Get additional context from the framework
+ */
+ getAdditionalContext?(request: TRequest): Record
+}
+
+export interface BotDetectionRequestData {
+ path: string
+ method: string
+ headers: Record
+ ip: string
+ timestamp: number
+ userAgent?: string
+ referer?: string
+ acceptLanguage?: string
+ acceptEncoding?: string
+}
+
+export interface BotDetectionDriverOptions {
+ sessionConfig?: {
+ password?: string
+ cookieName?: string
+ }
+ ipExtraction?: {
+ trustProxy?: boolean
+ proxyHeaders?: string[]
+ }
+ debug?: boolean
+}
\ No newline at end of file
diff --git a/libs/is-bot/src/enhanced-analyzer.ts b/libs/is-bot/src/enhanced-analyzer.ts
new file mode 100644
index 00000000..dcb8c0a6
--- /dev/null
+++ b/libs/is-bot/src/enhanced-analyzer.ts
@@ -0,0 +1,543 @@
+// Enhanced bot detection analyzer with strict, context-aware heuristics
+import type { H3Event } from 'h3'
+import type { BotDetectionBehavior, SessionData } from './behavior'
+import type { ImprovedDetectionContext, SiteProfile } from './improved-behavior'
+import { getHeaders } from 'h3'
+import {
+ analyzeUserIntent,
+ buildSiteProfile,
+ IMPROVED_BEHAVIOR_WEIGHTS,
+
+ scorePathAccess,
+
+ updateCredibilityScore,
+} from './improved-behavior'
+
+// Global site profile (in production, this should be persisted)
+const globalSiteProfile: { value: SiteProfile | null } = { value: null }
+
+// Enhanced bot score thresholds (more strict)
+export const ENHANCED_THRESHOLDS = {
+ DEFINITELY_BOT: 80, // Lowered from 90 - we're more confident now
+ LIKELY_BOT: 60, // Lowered from 70 - better precision
+ SUSPICIOUS: 35, // Lowered from 40 - catch more edge cases
+ PROBABLY_HUMAN: 15, // Lowered from 20 - positive scoring allows this
+ DEFINITELY_HUMAN: -10, // NEW: Negative scores for highly trusted users
+}
+
+// Enhanced analysis with context awareness
+export function enhancedBotAnalysis({
+ event,
+ behavior,
+ timestamp: _timestamp = Date.now(),
+ debug: _debug = false,
+}: {
+ event: H3Event
+ behavior: BotDetectionBehavior
+ timestamp?: number
+ debug?: boolean
+}): {
+ botScore: number
+ confidence: number
+ factors: Array<{ type: string, score: number, reason: string }>
+ context: ImprovedDetectionContext
+ recommendation: 'allow' | 'challenge' | 'block'
+ } {
+ const path = event.path || ''
+ const _method = event.method || 'GET'
+ const headers = getHeaders(event)
+ const sessionData = behavior.session
+ const _ipData = behavior.ip
+
+ // Build/update site profile
+ globalSiteProfile.value = buildSiteProfile(event, globalSiteProfile.value || undefined)
+
+ // Analyze user intent based on request history
+ const userIntent = analyzeUserIntent(sessionData.lastRequests)
+
+ // Build detection context
+ const context: ImprovedDetectionContext = {
+ userIntent: userIntent as 'browsing' | 'exploring' | 'scanning' | 'exploiting' | 'unknown',
+ accessPattern: analyzeAccessPattern(sessionData) as 'human-like' | 'systematic' | 'random' | 'malicious',
+ credibilityScore: sessionData.knownGoodActions * 5, // Convert to 0-100 scale
+ authenticationStatus: detectAuthStatus(headers, path) as 'authenticated' | 'anonymous' | 'unknown',
+ referrerContext: analyzeReferrer(headers) as 'internal' | 'search-engine' | 'direct' | 'suspicious',
+ technicalProfile: analyzeTechnicalProfile(headers, sessionData),
+ }
+
+ // Enhanced scoring with context awareness
+ const factors: Array<{ type: string, score: number, reason: string }> = []
+ let totalScore = 0
+
+ // 1. Context-aware path analysis
+ const pathAnalysis = scorePathAccess(path, globalSiteProfile.value!, context)
+ if (pathAnalysis.score !== 0) {
+ factors.push({
+ type: 'PATH_ACCESS',
+ score: pathAnalysis.score,
+ reason: pathAnalysis.reason,
+ })
+ totalScore += pathAnalysis.score
+ }
+
+ // 2. Intent-based scoring
+ const intentScore = scoreUserIntent(context.userIntent, sessionData)
+ if (intentScore !== 0) {
+ factors.push({
+ type: 'USER_INTENT',
+ score: intentScore,
+ reason: `Intent detected as: ${context.userIntent}`,
+ })
+ totalScore += intentScore
+ }
+
+ // 3. Positive scoring for good behavior
+ const positiveScore = scorePositiveBehavior(context, sessionData, globalSiteProfile.value!)
+ if (positiveScore !== 0) {
+ factors.push({
+ type: 'POSITIVE_BEHAVIOR',
+ score: positiveScore,
+ reason: 'Legitimate user behavior patterns detected',
+ })
+ totalScore += positiveScore
+ }
+
+ // 4. Technical profile analysis
+ const techScore = scoreTechnicalProfile(context.technicalProfile)
+ if (techScore !== 0) {
+ factors.push({
+ type: 'TECHNICAL_PROFILE',
+ score: techScore,
+ reason: 'Technical fingerprint analysis',
+ })
+ totalScore += techScore
+ }
+
+ // 5. Rate limiting with context
+ const rateScore = scoreRateLimiting(sessionData, context, globalSiteProfile.value!)
+ if (rateScore !== 0) {
+ factors.push({
+ type: 'RATE_LIMITING',
+ score: rateScore,
+ reason: 'Request rate analysis',
+ })
+ totalScore += rateScore
+ }
+
+ // 6. Enhanced timing analysis
+ const timingScore = scoreTimingPatterns(sessionData, context)
+ if (timingScore !== 0) {
+ factors.push({
+ type: 'TIMING_ANALYSIS',
+ score: timingScore,
+ reason: 'Request timing pattern analysis',
+ })
+ totalScore += timingScore
+ }
+
+ // Update credibility score
+ const newCredibilityScore = updateCredibilityScore(
+ context.credibilityScore,
+ sessionData,
+ context,
+ )
+
+ // Apply credibility bonus/penalty
+ const credibilityAdjustment = (newCredibilityScore - 50) * 0.2 // -10 to +10 adjustment
+ totalScore += credibilityAdjustment
+
+ if (credibilityAdjustment !== 0) {
+ factors.push({
+ type: 'CREDIBILITY_ADJUSTMENT',
+ score: credibilityAdjustment,
+ reason: `User credibility: ${newCredibilityScore}/100`,
+ })
+ }
+
+ // Calculate confidence based on number of data points
+ const confidence = calculateConfidence(sessionData, factors.length)
+
+ // Determine recommendation
+ const recommendation = determineRecommendation(totalScore, confidence, context)
+
+ return {
+ botScore: Math.round(totalScore * 10) / 10, // Round to 1 decimal
+ confidence,
+ factors,
+ context,
+ recommendation,
+ }
+}
+
+function analyzeAccessPattern(sessionData: SessionData): string {
+ const requests = sessionData.lastRequests
+ if (requests.length < 3)
+ return 'unknown'
+
+ // Check for human-like patterns
+ const hasVariedTiming = checkTimingVariation(requests)
+ const hasLogicalFlow = checkLogicalFlow(requests)
+ const hasNaturalErrors = checkNaturalErrorPattern(requests)
+
+ if (hasVariedTiming && hasLogicalFlow && hasNaturalErrors) {
+ return 'human-like'
+ }
+
+ // Check for systematic patterns
+ const isSystematic = checkSystematicAccess(requests)
+ if (isSystematic)
+ return 'systematic'
+
+ // Check for random patterns (possible bot)
+ const isRandom = checkRandomPattern(requests)
+ if (isRandom)
+ return 'random'
+
+ return 'unknown'
+}
+
+function checkTimingVariation(requests: Array<{ timestamp: number }>): boolean {
+ if (requests.length < 3)
+ return true
+
+ const intervals = []
+ for (let i = 1; i < requests.length; i++) {
+ intervals.push(requests[i].timestamp - requests[i - 1].timestamp)
+ }
+
+ const mean = intervals.reduce((sum, val) => sum + val, 0) / intervals.length
+ const variance = intervals.reduce((sum, val) => sum + (val - mean) ** 2, 0) / intervals.length
+ const stdDev = Math.sqrt(variance)
+ const coefficientOfVariation = stdDev / mean
+
+ // Human timing should have some variation (> 0.3)
+ return coefficientOfVariation > 0.3
+}
+
+function checkLogicalFlow(requests: Array<{ path: string }>): boolean {
+ // Look for logical navigation patterns rather than random access
+ const paths = requests.map(r => r.path)
+
+ // Check for common logical flows
+ const logicalPatterns = [
+ /^\/$/, // Start at home
+ /^\/[^/]+$/, // Go to main section
+ /^\/[^/]+\/[^/]+/, // Go deeper
+ ]
+
+ // At least 50% should follow some logical pattern
+ const logicalRequests = paths.filter(path =>
+ logicalPatterns.some(pattern => pattern.test(path)),
+ )
+
+ return logicalRequests.length / paths.length > 0.5
+}
+
+function checkNaturalErrorPattern(requests: Array<{ status?: number }>): boolean {
+ const errorCount = requests.filter(r => r.status && r.status >= 400).length
+ const totalRequests = requests.length
+
+ // Natural users have some errors (typos, broken links) but not too many
+ const errorRate = errorCount / totalRequests
+ return errorRate > 0.05 && errorRate < 0.3 // 5-30% error rate is natural
+}
+
+function checkSystematicAccess(requests: Array<{ path: string }>): boolean {
+ const paths = requests.map(r => r.path)
+
+ // Check for sequential patterns
+ const numbers = paths.map((p) => {
+ const match = p.match(/\/(\d+)/)
+ return match ? Number.parseInt(match[1]) : null
+ }).filter(n => n !== null)
+
+ if (numbers.length >= 3) {
+ const sorted = [...numbers].sort((a, b) => a - b)
+ const isSequential = sorted.every((val, i) => i === 0 || val === sorted[i - 1] + 1)
+ if (isSequential)
+ return true
+ }
+
+ return false
+}
+
+function checkRandomPattern(requests: Array<{ path: string }>): boolean {
+ const paths = requests.map(r => r.path)
+ const uniquePaths = new Set(paths)
+
+ // Very high unique path ratio with no logical flow suggests random access
+ return (uniquePaths.size / paths.length) > 0.9 && paths.length > 5
+}
+
+function detectAuthStatus(headers: Record, path: string): string {
+ // Check for authentication headers/cookies
+ const authHeaders = ['authorization', 'cookie', 'x-auth-token']
+ const hasAuthHeaders = authHeaders.some(header => headers[header])
+
+ if (hasAuthHeaders)
+ return 'authenticated'
+ if (path.includes('/login') || path.includes('/auth'))
+ return 'unknown'
+ return 'anonymous'
+}
+
+function analyzeReferrer(headers: Record): string {
+ const referrer = headers.referer || headers.referrer || ''
+
+ if (!referrer)
+ return 'direct'
+
+ if (referrer.includes('google.com')
+ || referrer.includes('bing.com')
+ || referrer.includes('duckduckgo.com')) {
+ return 'search-engine'
+ }
+
+ // Check if internal referrer
+ try {
+ const referrerUrl = new URL(referrer)
+ const currentHost = headers.host
+ if (referrerUrl.hostname === currentHost) {
+ return 'internal'
+ }
+ }
+ catch {}
+
+ return 'external'
+}
+
+function analyzeTechnicalProfile(headers: Record, sessionData: SessionData) {
+ const _userAgent = headers['user-agent'] || ''
+ const acceptLanguage = headers['accept-language'] || ''
+ const acceptEncoding = headers['accept-encoding'] || ''
+
+ const browserFeatures = []
+ if (acceptLanguage)
+ browserFeatures.push('language')
+ if (acceptEncoding.includes('gzip'))
+ browserFeatures.push('compression')
+ if (headers['sec-ch-ua'])
+ browserFeatures.push('client-hints')
+
+ const networkConsistency = calculateNetworkConsistency(sessionData)
+ const headerCredibility = calculateHeaderCredibility(headers)
+
+ return {
+ browserFeatures,
+ networkConsistency,
+ headerCredibility,
+ }
+}
+
+function calculateNetworkConsistency(sessionData: SessionData): number {
+ // Analyze if timing patterns suggest same network/client
+ const requests = sessionData.lastRequests
+ if (requests.length < 5)
+ return 0.5 // Neutral
+
+ const intervals = []
+ for (let i = 1; i < requests.length; i++) {
+ intervals.push(requests[i].timestamp - requests[i - 1].timestamp)
+ }
+
+ const mean = intervals.reduce((sum, val) => sum + val, 0) / intervals.length
+ const variance = intervals.reduce((sum, val) => sum + (val - mean) ** 2, 0) / intervals.length
+
+ // Consistent network should have some baseline timing
+ return Math.min(1, variance / 1000) // Normalize to 0-1
+}
+
+function calculateHeaderCredibility(headers: Record): number {
+ let credibility = 0.5 // Start neutral
+
+ const userAgent = headers['user-agent'] || ''
+ const acceptLanguage = headers['accept-language'] || ''
+
+ // Positive indicators
+ if (userAgent.includes('Mozilla/') && userAgent.includes('Chrome/'))
+ credibility += 0.2
+ if (acceptLanguage.includes(','))
+ credibility += 0.1 // Multiple languages
+ if (headers['sec-ch-ua'])
+ credibility += 0.1 // Modern browser
+ if (headers['accept-encoding']?.includes('br'))
+ credibility += 0.1 // Brotli support
+
+ // Negative indicators
+ if (!userAgent)
+ credibility -= 0.3
+ if (userAgent.length < 20)
+ credibility -= 0.2 // Too short
+ if (!headers.accept)
+ credibility -= 0.2
+
+ return Math.max(0, Math.min(1, credibility))
+}
+
+function scoreUserIntent(intent: string, _sessionData: SessionData): number {
+ switch (intent) {
+ case 'browsing':
+ return IMPROVED_BEHAVIOR_WEIGHTS.GOOD_NAVIGATION
+ case 'exploring':
+ return 0 // Neutral - legitimate exploration
+ case 'scanning':
+ return IMPROVED_BEHAVIOR_WEIGHTS.SYSTEMATIC_ENUMERATION
+ case 'exploiting':
+ return IMPROVED_BEHAVIOR_WEIGHTS.VULNERABILITY_PROBE
+ default:
+ return 0
+ }
+}
+
+function scorePositiveBehavior(
+ context: ImprovedDetectionContext,
+ sessionData: SessionData,
+ _siteProfile: SiteProfile,
+): number {
+ let positiveScore = 0
+
+ // Reward legitimate referrers
+ if (context.referrerContext === 'search-engine') {
+ positiveScore += IMPROVED_BEHAVIOR_WEIGHTS.LEGITIMATE_REFERRER
+ }
+
+ // Reward proper headers
+ if (context.technicalProfile.headerCredibility > 0.7) {
+ positiveScore += IMPROVED_BEHAVIOR_WEIGHTS.PROPER_HEADERS
+ }
+
+ // Reward authenticated access
+ if (context.authenticationStatus === 'authenticated') {
+ positiveScore += IMPROVED_BEHAVIOR_WEIGHTS.AUTHENTICATED_ACCESS
+ }
+
+ // Reward content engagement (time between requests suggests reading)
+ const avgInterval = sessionData.averageTimeBetweenRequests || 0
+ if (avgInterval > 10000 && avgInterval < 120000) { // 10s - 2min suggests reading
+ positiveScore += IMPROVED_BEHAVIOR_WEIGHTS.CONTENT_ENGAGEMENT
+ }
+
+ return positiveScore
+}
+
+function scoreTechnicalProfile(profile: any): number {
+ let score = 0
+
+ if (profile.headerCredibility < 0.3) {
+ score += IMPROVED_BEHAVIOR_WEIGHTS.SUSPICIOUS_USER_AGENT
+ }
+
+ if (profile.browserFeatures.length < 2) {
+ score += 10 // Penalty for minimal browser features
+ }
+
+ return score
+}
+
+function scoreRateLimiting(
+ sessionData: SessionData,
+ context: ImprovedDetectionContext,
+ siteProfile: SiteProfile,
+): number {
+ const now = Date.now()
+ const oneMinuteAgo = now - 60000
+ const requestsLastMinute = sessionData.lastRequests.filter(r => r.timestamp > oneMinuteAgo).length
+
+ // Context-aware rate limits
+ let rateLimit = 15 // Default
+
+ if (context.authenticationStatus === 'authenticated')
+ rateLimit = 30
+ if (siteProfile.apiEndpoints.length > 0 && context.userIntent === 'browsing')
+ rateLimit = 25
+ if (context.userIntent === 'scanning')
+ rateLimit = 5 // Very strict for scanners
+
+ if (requestsLastMinute > rateLimit) {
+ const overage = requestsLastMinute - rateLimit
+ return Math.min(IMPROVED_BEHAVIOR_WEIGHTS.API_ABUSE, overage * 5)
+ }
+
+ return 0
+}
+
+function scoreTimingPatterns(sessionData: SessionData, context: ImprovedDetectionContext): number {
+ if (sessionData.lastRequests.length < 5)
+ return 0
+
+ const intervals = []
+ for (let i = 1; i < sessionData.lastRequests.length; i++) {
+ const interval = sessionData.lastRequests[i].timestamp - sessionData.lastRequests[i - 1].timestamp
+ intervals.push(interval)
+ }
+
+ const mean = intervals.reduce((sum, val) => sum + val, 0) / intervals.length
+ const variance = intervals.reduce((sum, val) => sum + (val - mean) ** 2, 0) / intervals.length
+ const stdDev = Math.sqrt(variance)
+ const coefficientOfVariation = stdDev / mean
+
+ // Only penalize if clearly robotic AND intent is suspicious
+ if (coefficientOfVariation < 0.05 && mean < 2000 && context.userIntent === 'scanning') {
+ return 30 // High penalty for robotic timing + suspicious intent
+ }
+
+ // Very strict for exploitation
+ if (coefficientOfVariation < 0.1 && context.userIntent === 'exploiting') {
+ return 40
+ }
+
+ return 0
+}
+
+function calculateConfidence(sessionData: SessionData, factorCount: number): number {
+ // Confidence based on amount of data and number of detection factors
+ const requestCount = sessionData.lastRequests.length
+ const timeSpan = Date.now() - sessionData.firstSeenAt
+
+ let confidence = 0
+
+ // More requests = higher confidence
+ confidence += Math.min(50, requestCount * 5)
+
+ // Longer observation = higher confidence
+ confidence += Math.min(30, timeSpan / (60000)) // Minutes observed
+
+ // More detection factors = higher confidence
+ confidence += Math.min(20, factorCount * 4)
+
+ return Math.min(100, confidence)
+}
+
+function determineRecommendation(
+ score: number,
+ confidence: number,
+ _context: ImprovedDetectionContext,
+): 'allow' | 'challenge' | 'block' {
+ // High confidence decisions
+ if (confidence > 80) {
+ if (score >= ENHANCED_THRESHOLDS.DEFINITELY_BOT)
+ return 'block'
+ if (score >= ENHANCED_THRESHOLDS.LIKELY_BOT)
+ return 'challenge'
+ if (score <= ENHANCED_THRESHOLDS.DEFINITELY_HUMAN)
+ return 'allow'
+ }
+
+ // Medium confidence - be more conservative
+ if (confidence > 50) {
+ if (score >= ENHANCED_THRESHOLDS.DEFINITELY_BOT + 10)
+ return 'block'
+ if (score >= ENHANCED_THRESHOLDS.LIKELY_BOT + 5)
+ return 'challenge'
+ }
+
+ // Low confidence - mostly allow with some challenges
+ if (score >= ENHANCED_THRESHOLDS.DEFINITELY_BOT + 20)
+ return 'block'
+ if (score >= ENHANCED_THRESHOLDS.LIKELY_BOT + 15)
+ return 'challenge'
+
+ return 'allow'
+}
+
+export { globalSiteProfile }
diff --git a/libs/is-bot/src/improved-behavior.ts b/libs/is-bot/src/improved-behavior.ts
new file mode 100644
index 00000000..f79ce3d7
--- /dev/null
+++ b/libs/is-bot/src/improved-behavior.ts
@@ -0,0 +1,303 @@
+// Improved bot detection with context-aware heuristics
+import type { H3Event } from 'h3'
+import { getHeaders, getResponseStatus } from 'h3'
+
+// Smart path analysis that adapts to the actual site
+export interface SiteProfile {
+ detectedCMS?: 'wordpress' | 'drupal' | 'nuxt' | 'next' | 'unknown'
+ hasAdminArea: boolean
+ adminPaths: string[]
+ apiEndpoints: string[]
+ existingPaths: Set
+ userAgentPatterns: Map
+ legitimateAccessPatterns: string[]
+}
+
+// Context-aware scoring that considers intent
+export interface ImprovedDetectionContext {
+ userIntent: 'browsing' | 'exploring' | 'scanning' | 'exploiting' | 'unknown'
+ accessPattern: 'human-like' | 'systematic' | 'random' | 'malicious'
+ credibilityScore: number // 0-100, builds over time
+ authenticationStatus: 'authenticated' | 'anonymous' | 'unknown'
+ referrerContext: 'internal' | 'search-engine' | 'direct' | 'suspicious'
+ technicalProfile: {
+ browserFeatures: string[]
+ networkConsistency: number
+ headerCredibility: number
+ }
+}
+
+// Improved scoring that rewards good behavior
+export const IMPROVED_BEHAVIOR_WEIGHTS = {
+ // Positive factors (reduce bot score)
+ GOOD_NAVIGATION: -10, // Following logical navigation paths
+ CONTENT_ENGAGEMENT: -15, // Time spent reading content
+ LEGITIMATE_REFERRER: -5, // Coming from search engines/legitimate sites
+ PROPER_HEADERS: -8, // Complete, consistent header set
+ AUTHENTICATED_ACCESS: -20, // Successfully authenticated users
+
+ // Negative factors (increase bot score)
+ NONEXISTENT_PATH_SCAN: 25, // Scanning for paths that don't exist
+ CREDENTIAL_STUFFING: 50, // Multiple login attempts
+ VULNERABILITY_PROBE: 40, // Testing for known vulnerabilities
+ RAPID_ERROR_GENERATION: 30, // Generating many errors quickly
+ SUSPICIOUS_USER_AGENT: 20, // User agent inconsistencies
+
+ // Context-dependent factors
+ ADMIN_ACCESS_UNAUTHENTICATED: 35, // Accessing admin without auth
+ API_ABUSE: 25, // Excessive API calls without proper usage
+ SYSTEMATIC_ENUMERATION: 30, // Clear enumeration patterns
+}
+
+// Intent recognition based on request patterns
+export function analyzeUserIntent(requests: Array<{ path: string, timestamp: number, status?: number }>): string {
+ if (requests.length < 3)
+ return 'unknown'
+
+ const paths = requests.map(r => r.path)
+ const recentPaths = paths.slice(-10) // Last 10 requests
+
+ // Check for logical navigation patterns
+ const hasLogicalProgression = checkLogicalProgression(recentPaths)
+ if (hasLogicalProgression)
+ return 'browsing'
+
+ // Check for systematic scanning
+ const isSystematic = checkSystematicPattern(recentPaths)
+ if (isSystematic)
+ return 'scanning'
+
+ // Check for exploitation attempts
+ const isExploiting = checkExploitationPattern(recentPaths)
+ if (isExploiting)
+ return 'exploiting'
+
+ // Check for curious exploration (legitimate)
+ const isExploring = checkExplorationPattern(recentPaths)
+ if (isExploring)
+ return 'exploring'
+
+ return 'unknown'
+}
+
+function checkLogicalProgression(paths: string[]): boolean {
+ // Look for human-like navigation:
+ // - Home -> category -> article
+ // - Search -> results -> details
+ // - Navigation menu following
+
+ const navigationPatterns = [
+ ['/', '/blog', '/blog/'], // Home to blog
+ ['/', '/products', '/products/'], // Home to products
+ ['/search', '/article/', '/'], // Search to content
+ ]
+
+ return navigationPatterns.some(pattern =>
+ pattern.every((step, i) => i >= paths.length || paths[i].includes(step)),
+ )
+}
+
+function checkSystematicPattern(paths: string[]): boolean {
+ // Detect systematic scanning (high confidence bot behavior):
+ // - Sequential numeric IDs: /user/1, /user/2, /user/3
+ // - Alphabetical enumeration: /admin, /backup, /config
+ // - Extension testing: /index.php, /index.asp, /index.html
+
+ // Check for numeric sequence scanning
+ const numericMatches = paths
+ .map(p => p.match(/\/(\d+)(?:\/|$)/))
+ .filter(Boolean)
+ .map(m => Number.parseInt(m![1]))
+
+ if (numericMatches.length >= 3) {
+ const sorted = [...numericMatches].sort((a, b) => a - b)
+ const isSequential = sorted.every((val, i) => i === 0 || val === sorted[i - 1] + 1)
+ if (isSequential)
+ return true
+ }
+
+ // Check for alphabetical scanning
+ const pathBases = paths.map(p => p.split('/').pop()?.split('?')[0]).filter(Boolean)
+ if (pathBases.length >= 4) {
+ const sorted = [...pathBases].sort()
+ const isAlphabetical = pathBases.join('') === sorted.join('')
+ if (isAlphabetical)
+ return true
+ }
+
+ return false
+}
+
+function checkExploitationPattern(paths: string[]): boolean {
+ // High-confidence malicious patterns:
+ // - SQL injection attempts
+ // - XSS probe attempts
+ // - Directory traversal
+ // - Known vulnerability scanners
+
+ const exploitPatterns = [
+ /['"]\s*(union|select|insert|update|delete)/i, // SQL injection
+ /