-
Notifications
You must be signed in to change notification settings - Fork 83
Add method to track an AI bot request, if the current user agent is a known AI bot #148
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 12 commits
eaeb3d8
70e7048
adb1b0b
2a1772d
24a1cd2
af83c1b
d45ae3f
81ec770
5a1e17f
0bbf3b1
483de93
d40e1df
0004dff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,6 +32,17 @@ class MatomoTracker | |
| */ | ||
| static public $URL = ''; | ||
|
|
||
| public const AI_BOT_USER_AGENT_SUBSTRINGS = [ | ||
| 'ChatGPT-User', | ||
| 'MistralAI-User', | ||
| 'Gemini-Deep-Research', | ||
| 'Claude-User', | ||
| 'Perplexity-User', | ||
| 'Google-NotebookLM', | ||
| 'Devin', | ||
| 'GPTBot', | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure about this one too. I don’t see any reference in matomo code base.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GPTBot is listed here: https://platform.openai.com/docs/bots If it still shouldn't be tracked, let me know. |
||
| ]; | ||
|
|
||
| /** | ||
| * API Version | ||
| * | ||
|
|
@@ -161,11 +172,11 @@ class MatomoTracker | |
|
|
||
| // Visitor Ids in order | ||
| public $userId = false; | ||
|
|
||
| public $forcedVisitorId = false; | ||
|
|
||
| public $cookieVisitorId = false; | ||
|
|
||
| public $randomVisitorId = false; | ||
|
|
||
| public $configCookiesDisabled = false; | ||
|
|
@@ -186,11 +197,11 @@ class MatomoTracker | |
|
|
||
| // Allow debug while blocking the request | ||
| public $requestTimeout = 600; | ||
|
|
||
| public $requestConnectTimeout = 300; | ||
|
|
||
| public $doBulkRequests = false; | ||
|
|
||
| public $storedTrackingActions = []; | ||
|
|
||
| public $sendImageResponse = true; | ||
|
|
@@ -238,7 +249,7 @@ public function __construct(int $idSite, string $apiUrl = '') | |
|
|
||
| $this->currentTs = time(); | ||
| $this->createTs = $this->currentTs; | ||
|
|
||
| $this->visitorCustomVar = $this->getCustomVariablesFromCookie(); | ||
| } | ||
|
|
||
|
|
@@ -735,7 +746,7 @@ public function enableBulkTracking(): void | |
| } | ||
|
|
||
| /** | ||
| * Disables the bulk request feature. Make sure to call `doBulkTrack()` before disabling it if you have stored | ||
| * Disables the bulk request feature. Make sure to call `doBulkTrack()` before disabling it if you have stored | ||
| * tracking actions previously as this method won't be sending any previously stored actions before disabling it. | ||
| */ | ||
| public function disableBulkTracking(): void | ||
|
|
@@ -833,6 +844,24 @@ public function doTrackPageView(string $documentTitle) | |
| return $this->sendRequest($url); | ||
| } | ||
|
|
||
| /** | ||
| * If the current user agent belongs to an AI agent bot, tracks a pageview action. | ||
| * | ||
| * This method should be used server side to track AI bots that do not execute | ||
| * JavaScript. | ||
| * | ||
| * @return mixed Response string or true if using bulk requests. | ||
| */ | ||
| public function doTrackPageViewIfAIBot(?int $httpStatus = null, ?int $responseSizeBytes = null, ?int $serverTimeMs = null, ?string $source = null) | ||
| { | ||
| if (!self::isUserAgentAIBot($this->userAgent)) { | ||
| return null; | ||
| } | ||
|
|
||
| $url = $this->getUrlTrackAIBot($httpStatus, $responseSizeBytes, $serverTimeMs, $source); | ||
| return $this->sendRequest($url); | ||
| } | ||
|
|
||
| /** | ||
| * Override PageView id for every use of `doTrackPageView()`. Do not use this if you call `doTrackPageView()` | ||
| * multiple times during tracking (if, for example, you are tracking a single page application). | ||
|
|
@@ -847,7 +876,7 @@ public function setPageviewId(string $idPageview): void | |
| * Returns the PageView id. If the id was manually set using `setPageViewId()`, that id will be returned. | ||
| * If the id was not set manually, the id that was automatically generated in last `doTrackPageView()` will | ||
| * be returned. If there was no last page view, this will be false. | ||
| * | ||
| * | ||
| * @return string|false The PageView id as string or false if there is none yet. | ||
| */ | ||
| public function getPageviewId() | ||
|
|
@@ -891,7 +920,7 @@ public function doTrackEvent( | |
| public function doTrackContentImpression( | ||
| string $contentName, | ||
| string $contentPiece = 'Unknown', | ||
| $contentTarget = false | ||
| $contentTarget = false | ||
| ) { | ||
| $url = $this->getUrlTrackContentImpression($contentName, $contentPiece, $contentTarget); | ||
|
|
||
|
|
@@ -1215,6 +1244,40 @@ private function forceDotAsSeparatorForDecimalPoint($value): string | |
| return str_replace(',', '.', $value); | ||
| } | ||
|
|
||
| /** | ||
| * Builds a URL to track a request from an AI bot. | ||
| * | ||
| * @param int|null $httpStatus the request's HTTP status code, if it is known. | ||
| * @param int|null $responseSizeBytes the size of the response sent to the AI bot, if known. | ||
| * @param int|null $serverTimeMs the number of milliseconds it took to process the request, if known. | ||
| * @param string|null $source | ||
| * @return string | ||
| */ | ||
| public function getUrlTrackAIBot(?int $httpStatus = null, ?int $responseSizeBytes = null, ?int $serverTimeMs = null, ?string $source = null): string | ||
| { | ||
| $url = $this->getRequest($this->idSite); | ||
|
|
||
| $url .= '&recMode=1'; | ||
|
|
||
| if (!empty($httpStatus)) { | ||
| $url .= '&http_status=' . $httpStatus; | ||
| } | ||
|
|
||
| if (!empty($responseSizeBytes)) { | ||
| $url .= '&bw_bytes=' . $responseSizeBytes; | ||
| } | ||
|
|
||
| if (!empty($serverTimeMs)) { | ||
| $url .= '&pf_srv=' . $serverTimeMs; | ||
| } | ||
|
|
||
| if (!empty($source)) { | ||
| $url .= '&source=' . rawurlencode($source); | ||
| } | ||
|
|
||
| return $url; | ||
| } | ||
|
|
||
| /** | ||
| * Returns URL used to track Ecommerce Cart updates | ||
| * Calling this function will reinitializes the property ecommerceItems to empty array | ||
|
|
@@ -1362,7 +1425,7 @@ public function getUrlTrackEvent( | |
| public function getUrlTrackContentImpression( | ||
| string $contentName, | ||
| string $contentPiece, | ||
| $contentTarget | ||
| $contentTarget | ||
| ): string { | ||
| $url = $this->getRequest($this->idSite); | ||
|
|
||
|
|
@@ -1876,7 +1939,7 @@ public function setRequestTimeout(int $timeout) | |
|
|
||
| return $this; | ||
| } | ||
|
|
||
| /** | ||
| * Returns the maximum number of seconds the tracker will spend trying to connect to Matomo. | ||
| * Defaults to 300 seconds. | ||
|
|
@@ -1904,7 +1967,7 @@ public function setRequestConnectTimeout(int $timeout) | |
| return $this; | ||
| } | ||
|
|
||
| /** | ||
| /** | ||
| * Sets the request method to POST, which is recommended when using setTokenAuth() | ||
| * to prevent the token from being recorded in server logs. Avoid using redirects | ||
| * when using POST to prevent the loss of POST values. When using Log Analytics, | ||
|
|
@@ -1957,7 +2020,7 @@ private function getProxy(): ?string | |
| protected function prepareCurlOptions( | ||
| string $url, | ||
| string $method, | ||
| $data, | ||
| $data, | ||
| bool $forcePostUrlEncoded | ||
| ): array { | ||
| $options = [ | ||
|
|
@@ -2374,7 +2437,7 @@ protected static function getCurrentScriptName(): string | |
| if (empty($url) && isset($_SERVER['SCRIPT_NAME'])) { | ||
| $url = $_SERVER['SCRIPT_NAME']; | ||
| } elseif (empty($url)) { | ||
| $url = '/'; | ||
| $url = '/'; | ||
| } | ||
|
|
||
| if (!empty($url) && $url[0] !== '/') { | ||
|
|
@@ -2443,9 +2506,9 @@ protected static function getCurrentQueryString(): string | |
| protected static function getCurrentUrl(): string | ||
| { | ||
| return self::getCurrentScheme() . '://' | ||
| . self::getCurrentHost() | ||
| . self::getCurrentScriptName() | ||
| . self::getCurrentQueryString(); | ||
| . self::getCurrentHost() | ||
| . self::getCurrentScriptName() | ||
| . self::getCurrentQueryString(); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -2576,6 +2639,26 @@ protected function parseIncomingCookies(array $headers): void | |
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Returns true if the given user agent belongs to a known AI bot. | ||
| * | ||
| * @param string $userAgent | ||
| * @return bool | ||
| */ | ||
| public static function isUserAgentAIBot(string $userAgent): bool | ||
| { | ||
| if (empty($userAgent)) { | ||
| return false; | ||
| } | ||
|
|
||
| foreach (self::AI_BOT_USER_AGENT_SUBSTRINGS as $substring) { | ||
| if (stripos($userAgent, $substring) !== false) { | ||
| return true; | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.