Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 101 additions & 18 deletions MatomoTracker.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ class MatomoTracker
*/
static public $URL = '';

public const AI_BOT_USER_AGENT_SUBSTRINGS = [
'ChatGPT-User',
'MistralAI-User',
'Gemini-Deep-Research',
'Claude-User',
'Perplexity-User',
'Google-NotebookLM',
'Devin',
Comment thread
diosmosis marked this conversation as resolved.
Outdated
'GPTBot',

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure about this one too. I don’t see any reference in matomo code base.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GPTBot is listed here: https://platform.openai.com/docs/bots

If it still shouldn't be tracked, let me know.

];

/**
* API Version
*
Expand Down Expand Up @@ -161,11 +172,11 @@ class MatomoTracker

// Visitor Ids in order
public $userId = false;

public $forcedVisitorId = false;

public $cookieVisitorId = false;

public $randomVisitorId = false;

public $configCookiesDisabled = false;
Expand All @@ -186,11 +197,11 @@ class MatomoTracker

// Allow debug while blocking the request
public $requestTimeout = 600;

public $requestConnectTimeout = 300;

public $doBulkRequests = false;

public $storedTrackingActions = [];

public $sendImageResponse = true;
Expand Down Expand Up @@ -238,7 +249,7 @@ public function __construct(int $idSite, string $apiUrl = '')

$this->currentTs = time();
$this->createTs = $this->currentTs;

$this->visitorCustomVar = $this->getCustomVariablesFromCookie();
}

Expand Down Expand Up @@ -735,7 +746,7 @@ public function enableBulkTracking(): void
}

/**
* Disables the bulk request feature. Make sure to call `doBulkTrack()` before disabling it if you have stored
* Disables the bulk request feature. Make sure to call `doBulkTrack()` before disabling it if you have stored
* tracking actions previously as this method won't be sending any previously stored actions before disabling it.
*/
public function disableBulkTracking(): void
Expand Down Expand Up @@ -833,6 +844,24 @@ public function doTrackPageView(string $documentTitle)
return $this->sendRequest($url);
}

/**
* If the current user agent belongs to an AI agent bot, tracks a pageview action.
*
* This method should be used server side to track AI bots that do not execute
* JavaScript.
*
* @return mixed Response string or true if using bulk requests.
*/
public function doTrackPageViewIfAIBot(?int $httpStatus = null, ?int $responseSizeBytes = null, ?int $serverTimeMs = null, ?string $source = null)
{
if (!self::isUserAgentAIBot($this->userAgent)) {
return null;
}

$url = $this->getUrlTrackAIBot($httpStatus, $responseSizeBytes, $serverTimeMs, $source);
return $this->sendRequest($url);
}

/**
* Override PageView id for every use of `doTrackPageView()`. Do not use this if you call `doTrackPageView()`
* multiple times during tracking (if, for example, you are tracking a single page application).
Expand All @@ -847,7 +876,7 @@ public function setPageviewId(string $idPageview): void
* Returns the PageView id. If the id was manually set using `setPageViewId()`, that id will be returned.
* If the id was not set manually, the id that was automatically generated in last `doTrackPageView()` will
* be returned. If there was no last page view, this will be false.
*
*
* @return string|false The PageView id as string or false if there is none yet.
*/
public function getPageviewId()
Expand Down Expand Up @@ -891,7 +920,7 @@ public function doTrackEvent(
public function doTrackContentImpression(
string $contentName,
string $contentPiece = 'Unknown',
$contentTarget = false
$contentTarget = false
) {
$url = $this->getUrlTrackContentImpression($contentName, $contentPiece, $contentTarget);

Expand Down Expand Up @@ -1215,6 +1244,40 @@ private function forceDotAsSeparatorForDecimalPoint($value): string
return str_replace(',', '.', $value);
}

/**
* Builds a URL to track a request from an AI bot.
*
* @param int|null $httpStatus the request's HTTP status code, if it is known.
* @param int|null $responseSizeBytes the size of the response sent to the AI bot, if known.
* @param int|null $serverTimeMs the number of milliseconds it took to process the request, if known.
* @param string|null $source
* @return string
*/
public function getUrlTrackAIBot(?int $httpStatus = null, ?int $responseSizeBytes = null, ?int $serverTimeMs = null, ?string $source = null): string
{
$url = $this->getRequest($this->idSite);

$url .= '&recMode=1';

if (!empty($httpStatus)) {
$url .= '&http_status=' . $httpStatus;
}

if (!empty($responseSizeBytes)) {
$url .= '&bw_bytes=' . $responseSizeBytes;
}

if (!empty($serverTimeMs)) {
$url .= '&pf_srv=' . $serverTimeMs;
}

if (!empty($source)) {
$url .= '&source=' . rawurlencode($source);
}

return $url;
}

/**
* Returns URL used to track Ecommerce Cart updates
* Calling this function will reinitializes the property ecommerceItems to empty array
Expand Down Expand Up @@ -1362,7 +1425,7 @@ public function getUrlTrackEvent(
public function getUrlTrackContentImpression(
string $contentName,
string $contentPiece,
$contentTarget
$contentTarget
): string {
$url = $this->getRequest($this->idSite);

Expand Down Expand Up @@ -1876,7 +1939,7 @@ public function setRequestTimeout(int $timeout)

return $this;
}

/**
* Returns the maximum number of seconds the tracker will spend trying to connect to Matomo.
* Defaults to 300 seconds.
Expand Down Expand Up @@ -1904,7 +1967,7 @@ public function setRequestConnectTimeout(int $timeout)
return $this;
}

/**
/**
* Sets the request method to POST, which is recommended when using setTokenAuth()
* to prevent the token from being recorded in server logs. Avoid using redirects
* when using POST to prevent the loss of POST values. When using Log Analytics,
Expand Down Expand Up @@ -1957,7 +2020,7 @@ private function getProxy(): ?string
protected function prepareCurlOptions(
string $url,
string $method,
$data,
$data,
bool $forcePostUrlEncoded
): array {
$options = [
Expand Down Expand Up @@ -2374,7 +2437,7 @@ protected static function getCurrentScriptName(): string
if (empty($url) && isset($_SERVER['SCRIPT_NAME'])) {
$url = $_SERVER['SCRIPT_NAME'];
} elseif (empty($url)) {
$url = '/';
$url = '/';
}

if (!empty($url) && $url[0] !== '/') {
Expand Down Expand Up @@ -2443,9 +2506,9 @@ protected static function getCurrentQueryString(): string
protected static function getCurrentUrl(): string
{
return self::getCurrentScheme() . '://'
. self::getCurrentHost()
. self::getCurrentScriptName()
. self::getCurrentQueryString();
. self::getCurrentHost()
. self::getCurrentScriptName()
. self::getCurrentQueryString();
}

/**
Expand Down Expand Up @@ -2576,6 +2639,26 @@ protected function parseIncomingCookies(array $headers): void
}
}
}

/**
* Returns true if the given user agent belongs to a known AI bot.
*
* @param string $userAgent
* @return bool
*/
public static function isUserAgentAIBot(string $userAgent): bool
{
if (empty($userAgent)) {
return false;
}

foreach (self::AI_BOT_USER_AGENT_SUBSTRINGS as $substring) {
if (stripos($userAgent, $substring) !== false) {
return true;
}
}
return false;
}
}

/**
Expand Down
86 changes: 86 additions & 0 deletions tests/Unit/MatomoTrackerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,92 @@ public function test_setApiUrl()
$this->assertSame(substr($url, 0, strlen($newApiUrl)), $newApiUrl);
}

/**
* @dataProvider getTestDataForIsUserAgentAIBot
*/
public function test_isUserAgentAIBot($userAgent, $expected)
{
$this->assertSame($expected, \MatomoTracker::isUserAgentAIBot($userAgent));
}

public function getTestDataForIsUserAgentAIBot(): array
{
return [
['', false],

['Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.3', false],
['Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.3', false],

['Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot', true],
['Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; GPTBot/1.1; +https://openai.com/gptbot', true],
['Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; MistralAI-User/1.0; +https://docs.mistral.ai/robots)', true],
['Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Gemini-Deep-Research; +https://gemini.google/overview/deep-research/) Chrome/135.0.0.0 Safari/537.36', true],
['Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Claude-User/1.0; +Claude-User@anthropic.com)', true],
['Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Perplexity-User/1.0; +https://perplexity.ai/perplexity-user)', true],
['Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36; Devin/1.0; +devin.ai', true],
];
}

/**
* @dataProvider getTestDataForGetUrlTrackAIBot
*/
public function test_getUrlTrackAIBot(?int $httpStatus, ?int $responseSizeBytes, ?int $serverTimeMs, ?string $source, string $expected)
{
$_SERVER['HTTP_USER_AGENT'] = 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot';

$tracker = new \MatomoTracker(1, $apiUrl = self::TEST_URL);
$tracker->setUrl('https://example.com/page');
$tracker->setVisitorId('abcdef01234517ab');

$actual = $tracker->getUrlTrackAIBot($httpStatus, $responseSizeBytes, $serverTimeMs, $source);
$actual = $this->normalizeTrackingUrl($actual);

$this->assertEquals($expected, $actual);
}

public function getTestDataForGetUrlTrackAIBot(): array
{
return [
[
200,
34567,
123,
'wordpress',
'http://mymatomo.com/matomo.php?idsite=1&rec=1&apiv=1&r=&r=&cid=abcdef01234517ab&url=https%3A%2F%2Fexample.com%2Fpage&urlref=&recMode=1&http_status=200&bw_bytes=34567&pf_srv=123&source=wordpress',
],

[
null,
34567,
null,
'something else',
'http://mymatomo.com/matomo.php?idsite=1&rec=1&apiv=1&r=&r=&cid=abcdef01234517ab&url=https%3A%2F%2Fexample.com%2Fpage&urlref=&recMode=1&bw_bytes=34567&source=something%20else',
],

[
null,
null,
null,
null,
'http://mymatomo.com/matomo.php?idsite=1&rec=1&apiv=1&r=&r=&cid=abcdef01234517ab&url=https%3A%2F%2Fexample.com%2Fpage&urlref=&recMode=1',
],
];
}

private function normalizeTrackingUrl(string $url)
{
$nonDeterministicParams = [
'r',
'_idts',
];

foreach ($nonDeterministicParams as $param) {
$url = preg_replace('/&' . preg_quote($param) . '=[^&]+/', '&r=', $url);
}

return $url;
}

public function testUsageApiUrl(): void
{
$newApiUrl = 'https://NEW-API-URL.com';
Expand Down