From cdd49481c2c1326a8a03f28d9cc716e43816bd10 Mon Sep 17 00:00:00 2001 From: Peter Boehlke Date: Tue, 14 Oct 2025 17:48:56 +0200 Subject: [PATCH 1/2] Refs #23342 Try to fix the Live API to: - handle the offset correctly - only count necessary data from offset (memory efficient) - split dates into multiple queries created overlapping timeframes --- plugins/Live/Model.php | 186 +++++++++++++++++++++++++++-------------- 1 file changed, 125 insertions(+), 61 deletions(-) diff --git a/plugins/Live/Model.php b/plugins/Live/Model.php index 1ca45cc5802..f5b585a2cdb 100644 --- a/plugins/Live/Model.php +++ b/plugins/Live/Model.php @@ -53,8 +53,7 @@ public function queryLogVisits($idSite, $period, $date, $segment, $offset, $limi } // If no other filter, only look at the last 24 hours of stats - if ( - empty($visitorId) + if (empty($visitorId) && empty($limit) && empty($offset) && empty($period) @@ -69,39 +68,43 @@ public function queryLogVisits($idSite, $period, $date, $segment, $offset, $limi $queries = $this->splitDatesIntoMultipleQueries($dateStart, $dateEnd, $limit, $offset, $filterSortOrder); $foundVisits = array(); + $remainingOffset = $offset; foreach ($queries as $queryRange) { + // Calculate limit based on already found visits $updatedLimit = $limit; if (!empty($limit) && (int)$limit > -1) { $updatedLimit = $limit - count($foundVisits); - } - $updatedOffset = $offset; - if (!empty($offset) && !empty($foundVisits)) { - $updatedOffset = 0; // we've already skipped enough rows + if ($updatedLimit <= 0) { + break; // Found enough visits + } } - [$sql, $bind] = $this->makeLogVisitsQueryString($idSite, $queryRange[0], $queryRange[1], $segment, $updatedOffset, $updatedLimit, $visitorId, $minTimestamp, $filterSortOrder); + // Use the remaining offset for this query + [$sql, $bind] = $this->makeLogVisitsQueryString($idSite, $queryRange[0], $queryRange[1], $segment, $remainingOffset, $updatedLimit, $visitorId, $minTimestamp, $filterSortOrder); $visits = $this->executeLogVisitsQuery($sql, $bind, $segment, $dateStart, $dateEnd, $minTimestamp, $limit); - if (!empty($offset) && empty($visits)) { - // find out if there are any matches - $updatedOffset = 0; - [$sql, $bind] = $this->makeLogVisitsQueryString($idSite, $queryRange[0], $queryRange[1], $segment, $updatedOffset, $updatedLimit, $visitorId, $minTimestamp, $filterSortOrder); + if (!empty($remainingOffset) && empty($visits)) { + // No results with offset - count total visits in this time range efficiently + $totalInRange = $this->countLogVisitsInRange($idSite, $queryRange[0], $queryRange[1], $segment, $visitorId, $minTimestamp, $filterSortOrder); - $visits = $this->executeLogVisitsQuery($sql, $bind, $segment, $dateStart, $dateEnd, $minTimestamp, $limit); - if (!empty($visits)) { - // found out the number of visits that we skipped in this query - $offset = $offset - count($visits); - } + // Adjust offset for next queries + $remainingOffset = max(0, $remainingOffset - $totalInRange); + + // Continue to next query continue; } if (!empty($visits)) { $foundVisits = array_merge($foundVisits, $visits); + + // After first successful find, offset is fulfilled + $remainingOffset = 0; } + // Check if enough visits have been found if ($limit > 0 && count($foundVisits) >= $limit) { if (count($foundVisits) > $limit) { $foundVisits = array_slice($foundVisits, 0, $limit); @@ -122,6 +125,54 @@ public function queryLogVisits($idSite, $period, $date, $segment, $offset, $limi return $foundVisits; } + /** + * Count visits in a time range without loading all data into memory + * Uses SQL COUNT(*) for efficiency + * + * @param int|array $idSite + * @param Date $dateStart + * @param Date $dateEnd + * @param string $segment + * @param string $visitorId + * @param int $minTimestamp + * @param string $filterSortOrder + * @return int + * @throws Exception + */ + private function countLogVisitsInRange($idSite, $dateStart, $dateEnd, $segment, $visitorId, $minTimestamp, $filterSortOrder) + { + [$whereClause, $bindIdSites] = $this->getIdSitesWhereClause($idSite); + [$whereBind, $where] = $this->getWhereClauseAndBind($whereClause, $bindIdSites, $dateStart, $dateEnd, $visitorId, $minTimestamp); + + $segment = new Segment($segment, $idSite, $dateStart, $dateEnd); + + // Use COUNT(*), do not load all data + $select = "COUNT(*) as count"; + $from = "log_visit"; + + if ($segment->isEmpty()) { + $groupBy = false; + } else { + // When segment is used, we need to count distinct visits + $select = "COUNT(DISTINCT log_visit.idvisit) as count"; + $groupBy = false; // No GROUP BY needed when using COUNT(DISTINCT) + } + + $query = $segment->getSelectQuery($select, $from, $where, $whereBind, $orderBy = '', $groupBy); + + $query['sql'] = DbHelper::addMaxExecutionTimeHintToQuery($query['sql'], $this->getLiveQueryMaxExecutionTime()); + + $readerDb = Db::getReader(); + try { + $result = $readerDb->fetchOne($query['sql'], $query['bind']); + } catch (Exception $e) { + $this->handleMaxExecutionTimeError($readerDb, $e, $segment->getOriginalString(), $dateStart, $dateEnd, $minTimestamp, 0, $query); + throw $e; + } + + return (int)$result; + } + /** * Return the most recent date time of any visit for the given idSite * If period / date are provided the method return the most recent date time within that period @@ -262,7 +313,7 @@ public function splitDatesIntoMultipleQueries($dateStart, $dateEnd, $limit, $off { $virtualDateEnd = $dateEnd; if (empty($dateEnd)) { - $virtualDateEnd = Date::now()->addDay(1); // matomo always adds one day for some reason + $virtualDateEnd = Date::now()->addDay(1); } $virtualDateStart = $dateStart; @@ -272,72 +323,85 @@ public function splitDatesIntoMultipleQueries($dateStart, $dateEnd, $limit, $off $queries = []; $hasStartEndDateMoreThanOneDayInBetween = $virtualDateStart && $virtualDateStart->addDay(1)->isEarlier($virtualDateEnd); - if ( - $limit - && $hasStartEndDateMoreThanOneDayInBetween - ) { - if (strtolower($filterSortOrder) !== 'asc') { - $virtualDateEnd = $virtualDateEnd->subDay(1); - $queries[] = [$virtualDateEnd, $dateEnd]; // need to use ",endDate" in case endDate is not set - if ($virtualDateStart->addDay(7)->isEarlier($virtualDateEnd)) { - $queries[] = [$virtualDateEnd->subDay(7), $virtualDateEnd->subSeconds(1)]; - $virtualDateEnd = $virtualDateEnd->subDay(7); + if ($limit && $hasStartEndDateMoreThanOneDayInBetween) { + if (strtolower($filterSortOrder) !== 'asc') { + // DESC: From newest to oldest + $currentEnd = $virtualDateEnd; + + // First query: last day + $blockStart = $currentEnd->subDay(1); + $queries[] = [$blockStart, $dateEnd]; + $currentEnd = $blockStart; + + // 7-day block - only if enough space + if ($virtualDateStart->addDay(7)->isEarlier($currentEnd)) { + $blockStart = $currentEnd->subDay(7); + $queries[] = [$blockStart, $currentEnd->subSeconds(1)]; + $currentEnd = $blockStart; } if (!$offset) { - // only when no offset - // we would in worst case - if not enough visits are found to bypass the offset - execute below queries too often. - // like we would need to execute each of the queries twice just to find out if there are some visits that - // need to be skipped... - - if ($virtualDateStart->addDay(30)->isEarlier($virtualDateEnd)) { - $queries[] = [$virtualDateEnd->subDay(30), $virtualDateEnd->subSeconds(1)]; - $virtualDateEnd = $virtualDateEnd->subDay(30); + // 30-day block - only if enough space + if ($virtualDateStart->addDay(30)->isEarlier($currentEnd)) { + $blockStart = $currentEnd->subDay(30); + $queries[] = [$blockStart, $currentEnd->subSeconds(1)]; + $currentEnd = $blockStart; } - if ($virtualDateStart->addPeriod(1, 'year')->isEarlier($virtualDateEnd)) { - $queries[] = [$virtualDateEnd->subYear(1), $virtualDateEnd->subSeconds(1)]; - $virtualDateEnd = $virtualDateEnd->subYear(1); + + // 1-year block - only if enough space + if ($virtualDateStart->addPeriod(1, 'year')->isEarlier($currentEnd)) { + $blockStart = $currentEnd->subYear(1); + $queries[] = [$blockStart, $currentEnd->subSeconds(1)]; + $currentEnd = $blockStart; } } - if ($virtualDateStart->isEarlier($virtualDateEnd)) { - // need to use ",endDate" in case startDate is not set in which case we do not want to have any limit - $queries[] = [$dateStart, $virtualDateEnd->subSeconds(1)]; + // Rest + if ($virtualDateStart->isEarlier($currentEnd)) { + $queries[] = [$dateStart, $currentEnd->subSeconds(1)]; } } else { - $queries[] = [$virtualDateStart, $virtualDateStart->addDay(1)->subSeconds(1)]; - $virtualDateStart = $virtualDateStart->addDay(1); - - if ($virtualDateStart->addDay(7)->isEarlier($virtualDateEnd)) { - $queries[] = [$virtualDateStart, $virtualDateStart->addDay(7)->subSeconds(1)]; - $virtualDateStart = $virtualDateStart->addDay(7); + // ASC: From oldest to newest + $currentStart = $virtualDateStart; + + // First query: first day + $blockEnd = $currentStart->addDay(1); + $queries[] = [$currentStart, $blockEnd->subSeconds(1)]; + $currentStart = $blockEnd; + + // 7-day block - only if enough space + if ($currentStart->addDay(7)->isEarlier($virtualDateEnd)) { + $blockEnd = $currentStart->addDay(7); + $queries[] = [$currentStart, $blockEnd->subSeconds(1)]; + $currentStart = $blockEnd; } if (!$offset) { - // only when no offset - // we would in worst case - if not enough visits are found to bypass the offset - execute below queries too often. - // like we would need to execute each of the queries twice just to find out if there are some visits that - // need to be skipped... - - if ($virtualDateStart->addDay(30)->isEarlier($virtualDateEnd)) { - $queries[] = [$virtualDateStart, $virtualDateStart->addDay(30)->subSeconds(1)]; - $virtualDateStart = $virtualDateStart->addDay(30); + // 30-day block - only if enough space + if ($currentStart->addDay(30)->isEarlier($virtualDateEnd)) { + $blockEnd = $currentStart->addDay(30); + $queries[] = [$currentStart, $blockEnd->subSeconds(1)]; + $currentStart = $blockEnd; } - if ($virtualDateStart->addPeriod(1, 'year')->isEarlier($virtualDateEnd)) { - $queries[] = [$virtualDateStart, $virtualDateStart->addPeriod(1, 'year')->subSeconds(1)]; - $virtualDateStart = $virtualDateStart->addPeriod(1, 'year'); + + // 1-year block - only if enough space + if ($currentStart->addPeriod(1, 'year')->isEarlier($virtualDateEnd)) { + $blockEnd = $currentStart->addPeriod(1, 'year'); + $queries[] = [$currentStart, $blockEnd->subSeconds(1)]; + $currentStart = $blockEnd; } } - if ($virtualDateStart->isEarlier($virtualDateEnd)) { - // need to use ",endDate" in case startDate is not set in which case we do not want to have any limit - $queries[] = [$virtualDateStart, $dateEnd]; + // Rest + if ($currentStart->isEarlier($virtualDateEnd)) { + $queries[] = [$currentStart, $dateEnd]; } } } else { $queries[] = array($dateStart, $dateEnd); } + return $queries; } From 3112f801fde4dc7085b010db2944f72b642fa3ab Mon Sep 17 00:00:00 2001 From: Peter Boehlke Date: Tue, 14 Oct 2025 18:12:58 +0200 Subject: [PATCH 2/2] More offset corrections --- plugins/Live/Model.php | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/plugins/Live/Model.php b/plugins/Live/Model.php index f5b585a2cdb..77caa738a95 100644 --- a/plugins/Live/Model.php +++ b/plugins/Live/Model.php @@ -71,40 +71,34 @@ public function queryLogVisits($idSite, $period, $date, $segment, $offset, $limi $remainingOffset = $offset; foreach ($queries as $queryRange) { - // Calculate limit based on already found visits $updatedLimit = $limit; if (!empty($limit) && (int)$limit > -1) { $updatedLimit = $limit - count($foundVisits); - if ($updatedLimit <= 0) { - break; // Found enough visits + break; } } - // Use the remaining offset for this query [$sql, $bind] = $this->makeLogVisitsQueryString($idSite, $queryRange[0], $queryRange[1], $segment, $remainingOffset, $updatedLimit, $visitorId, $minTimestamp, $filterSortOrder); - $visits = $this->executeLogVisitsQuery($sql, $bind, $segment, $dateStart, $dateEnd, $minTimestamp, $limit); - if (!empty($remainingOffset) && empty($visits)) { - // No results with offset - count total visits in this time range efficiently - $totalInRange = $this->countLogVisitsInRange($idSite, $queryRange[0], $queryRange[1], $segment, $visitorId, $minTimestamp, $filterSortOrder); - - // Adjust offset for next queries - $remainingOffset = max(0, $remainingOffset - $totalInRange); - - // Continue to next query - continue; + if (!empty($remainingOffset)) { + if (empty($visits)) { + // No visits returned - need to count total in range to adjust offset + $totalInRange = $this->countLogVisitsInRange($idSite, $queryRange[0], $queryRange[1], $segment, $visitorId, $minTimestamp, $filterSortOrder); + $remainingOffset = max(0, $remainingOffset - $totalInRange); + continue; + } else { + // Visits returned - these are already AFTER the offset was applied by SQL + // So the offset is now fulfilled + $remainingOffset = 0; + } } if (!empty($visits)) { $foundVisits = array_merge($foundVisits, $visits); - - // After first successful find, offset is fulfilled - $remainingOffset = 0; } - // Check if enough visits have been found if ($limit > 0 && count($foundVisits) >= $limit) { if (count($foundVisits) > $limit) { $foundVisits = array_slice($foundVisits, 0, $limit);