Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Perf: Improve performance of top stats query with long time windows #4857

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ All notable changes to this project will be documented in this file.

### Removed
### Changed

- Improved performance of top stats query with longer time windows

### Fixed

- Fix returning filter suggestions for multiple custom property values in the dashboard Filter modal
Expand Down
23 changes: 17 additions & 6 deletions lib/plausible/stats/table_decider.ex
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,6 @@ defmodule Plausible.Stats.TableDecider do
end
end

# Note: This is inaccurate when filtering but required for old backwards compatibility
defp metric_partitioner(%Query{legacy_breakdown: true}, :pageviews), do: :either
defp metric_partitioner(%Query{legacy_breakdown: true}, :events), do: :either

defp metric_partitioner(_, :conversion_rate), do: :either
defp metric_partitioner(_, :group_conversion_rate), do: :either
defp metric_partitioner(_, :visitors), do: :either
Expand All @@ -75,12 +71,24 @@ defmodule Plausible.Stats.TableDecider do
defp metric_partitioner(_, :average_revenue), do: :event
defp metric_partitioner(_, :total_revenue), do: :event
defp metric_partitioner(_, :scroll_depth), do: :event
defp metric_partitioner(_, :pageviews), do: :event
defp metric_partitioner(_, :events), do: :event
defp metric_partitioner(_, :bounce_rate), do: :session
defp metric_partitioner(_, :visit_duration), do: :session
defp metric_partitioner(_, :views_per_visit), do: :session

defp metric_partitioner(query, metric) when metric in [:pageviews, :events] do
query_days = DateTime.diff(query.utc_time_range.last, query.utc_time_range.first, :day)

cond do
# Note: This is inaccurate when filtering but required for old backwards compatibility
legacy_breakdown?(query) -> :either
# When querying large time window aggregates, session-level event/pageview counts
# are accurate enough: the amount of sessions starting/ending outside of time window
# doesn't matter as much.
query_days > 5 and empty?(query.filters) and empty?(query.dimensions) -> :either
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This condition would be true for some periods on virgin dashboards (of users that have just started using Plausible). When the counts are in the single digits, the count changing when cycling through various periods may be mistaken for a bug.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may be an acceptable tradeoff, but maybe it's possible to describe a condition where the differences in counts really don't matter? For whom is the events table query most expensive actually?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed this live.

A problem with this approach is that when user looks at montly view and sees that $DAY had 9 visitors and after clicking on $DAY sees they had 8 visitors, this might trigger some WTFs from our users.

Arturs suggestion was to change this logic to only not read from sessions in realtime view.

I think this is valid, but want to ponder this a bit more and figure out the implications. Marking as a draft for now.

true -> :event
end
end

# Calculated metrics - handled on callsite separately from other metrics.
defp metric_partitioner(_, :time_on_page), do: :other
defp metric_partitioner(_, :total_visitors), do: :other
Expand All @@ -97,6 +105,9 @@ defmodule Plausible.Stats.TableDecider do

defp dimension_partitioner(_, _), do: :either

defp legacy_breakdown?(%Query{} = query), do: query.legacy_breakdown
defp legacy_breakdown?(query) when is_map(query), do: query[:legacy_breakdown]
apata marked this conversation as resolved.
Show resolved Hide resolved

@default %{event: [], session: [], either: [], other: [], sample_percent: []}
defp partition(values, query, partitioner) do
Enum.reduce(values, @default, fn value, acc ->
Expand Down
42 changes: 36 additions & 6 deletions test/plausible/stats/table_decider_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ defmodule Plausible.Stats.TableDeciderTest do
assert partition_metrics([:total_revenue, :visitors], query) ==
{[:total_revenue, :visitors], [], []}

assert partition_metrics([:pageviews, :visits], query) == {[:pageviews, :visits], [], []}
assert partition_metrics([:total_revenue, :visits], query) ==
{[:total_revenue, :visits], [], []}
end

test "metrics that can be calculated on either when session-only metrics" do
Expand Down Expand Up @@ -150,13 +151,42 @@ defmodule Plausible.Stats.TableDeciderTest do
assert partition_metrics([:visitors], make_query([], ["visit:exit_page"])) ==
{[], [:visitors], []}
end

test "pageviews and events metrics are event-only when short time window" do
query = make_query([], [], %{"period" => "day"})

assert partition_metrics([:pageviews, :events, :bounce_rate], query) ==
{[:pageviews, :events], [:bounce_rate], []}
end

test "pageviews and events metrics can be calculated via either table with longer time windows" do
query = make_query([], [], %{"period" => "7d"})

assert partition_metrics([:pageviews, :events, :bounce_rate], query) ==
{[], [:bounce_rate, :pageviews, :events], []}
end

test "pageviews and events metrics are event-only when filters or dimensions" do
query1 = make_query(["visit:exit_page"], [], %{"period" => "7d"})
query2 = make_query([], ["visit:exit_page"], %{"period" => "7d"})

assert partition_metrics([:pageviews, :events, :bounce_rate], query1) ==
{[:pageviews, :events], [:bounce_rate], []}

assert partition_metrics([:pageviews, :events, :bounce_rate], query2) ==
{[:pageviews, :events], [:bounce_rate], []}
end
end

defp make_query(filter_keys, dimensions \\ []) do
Query.from(build(:site), %{
"filters" => Enum.map(filter_keys, fn filter_key -> ["is", filter_key, []] end),
"dimensions" => dimensions
})
defp make_query(filter_keys, dimensions \\ [], extra \\ %{}) do
params =
%{
"filters" => Enum.map(filter_keys, fn filter_key -> ["is", filter_key, []] end),
"dimensions" => dimensions
}
|> Map.merge(extra)

Query.from(build(:site), params)
end

defp make_query_full_filters(filters) do
Expand Down
Loading