Skip to content

Commit

Permalink
APIv2: and/or/not support (#4480)
Browse files Browse the repository at this point in the history
* First approximation of AND/OR/NOT support

Broken by this:
- Goal filtering
- Table deciding
- Imports

* TableDecider handle nesting

* Query.remove_top_level_filters

* Plausible.Stats.Imported.SQL.Expression

* Handle AND/OR/NOT with imported data, create Plausible.Stats.Imported.SQL.WhereBuilder

* Add parser validations for event:goal, event:hostname and event:props:x filters top level constraints

* Move module around

* Query.get_filter -> Filters.filtering_on_dimension? in some callsites

* Filters.get_toplevel_filter

* TableDecider.sessions_join_events?, remove old method

* Transforming filters in query_optimizer

* Query API tests for and/or/not

* Reorder parser steps

* Post-merge test fixups

* Solve merge issue

* Simplify filtering_on_dimension?

* Update transformer code

* dimensions_used_in_filters min_depth option, simplify parser validations

* rename_dimensions_used_in_filter

* fix rename_dimensions_used_in_filter

* Rename a test
  • Loading branch information
macobo authored Sep 4, 2024
1 parent 3310006 commit 8fa3a83
Show file tree
Hide file tree
Showing 26 changed files with 683 additions and 188 deletions.
4 changes: 2 additions & 2 deletions extra/lib/plausible/stats/goal/revenue.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ defmodule Plausible.Stats.Goal.Revenue do
"""
import Ecto.Query

alias Plausible.Stats.Query
alias Plausible.Stats.Filters

@revenue_metrics [:average_revenue, :total_revenue]

Expand All @@ -25,7 +25,7 @@ defmodule Plausible.Stats.Goal.Revenue do
"""
def get_revenue_tracking_currency(site, query, metrics) do
goal_filters =
case Query.get_filter(query, "event:goal") do
case Filters.get_toplevel_filter(query, "event:goal") do
[:is, "event:goal", list] -> list
_ -> []
end
Expand Down
6 changes: 3 additions & 3 deletions lib/plausible/stats/aggregate.ex
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ defmodule Plausible.Stats.Aggregate do
use Plausible
import Plausible.Stats.Base
import Ecto.Query
alias Plausible.Stats.{Query, Util, SQL}
alias Plausible.Stats.{Query, Util, SQL, Filters}

def aggregate(site, query, metrics) do
{currency, metrics} =
Expand Down Expand Up @@ -49,7 +49,7 @@ defmodule Plausible.Stats.Aggregate do

defp aggregate_time_on_page(site, query) do
windowed_pages_q =
from e in base_event_query(site, Query.remove_filters(query, ["event:page"])),
from e in base_event_query(site, Query.remove_top_level_filters(query, ["event:page"])),
select: %{
next_timestamp: over(fragment("leadInFrame(?)", e.timestamp), :event_horizon),
next_pathname: over(fragment("leadInFrame(?)", e.pathname), :event_horizon),
Expand All @@ -65,7 +65,7 @@ defmodule Plausible.Stats.Aggregate do
]
]

event_page_filter = Query.get_filter(query, "event:page")
event_page_filter = Filters.get_toplevel_filter(query, "event:page")

timed_page_transitions_q =
from e in Ecto.Query.subquery(windowed_pages_q),
Expand Down
33 changes: 18 additions & 15 deletions lib/plausible/stats/breakdown.ex
Original file line number Diff line number Diff line change
Expand Up @@ -82,21 +82,24 @@ defmodule Plausible.Stats.Breakdown do
import Ecto.Query

windowed_pages_q =
from e in base_event_query(site, Query.remove_filters(query, ["event:page", "event:props"])),
select: %{
next_timestamp: over(fragment("leadInFrame(?)", e.timestamp), :event_horizon),
next_pathname: over(fragment("leadInFrame(?)", e.pathname), :event_horizon),
timestamp: e.timestamp,
pathname: e.pathname,
session_id: e.session_id
},
windows: [
event_horizon: [
partition_by: e.session_id,
order_by: e.timestamp,
frame: fragment("ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING")
]
]
from e in base_event_query(
site,
Query.remove_top_level_filters(query, ["event:page", "event:props"])
),
select: %{
next_timestamp: over(fragment("leadInFrame(?)", e.timestamp), :event_horizon),
next_pathname: over(fragment("leadInFrame(?)", e.pathname), :event_horizon),
timestamp: e.timestamp,
pathname: e.pathname,
session_id: e.session_id
},
windows: [
event_horizon: [
partition_by: e.session_id,
order_by: e.timestamp,
frame: fragment("ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING")
]
]

timed_page_transitions_q =
from e in subquery(windowed_pages_q),
Expand Down
4 changes: 2 additions & 2 deletions lib/plausible/stats/custom_props.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ defmodule Plausible.Stats.CustomProps do
Module for querying user defined 'custom properties'.
"""

alias Plausible.Stats.Query
alias Plausible.Stats.Filters
use Plausible.ClickhouseRepo
import Plausible.Stats.Base

def fetch_prop_names(site, query) do
case Query.get_filter_by_prefix(query, "event:props:") do
case Filters.get_toplevel_filter(query, "event:props:") do
[_op, "event:props:" <> key | _rest] ->
[key]

Expand Down
5 changes: 3 additions & 2 deletions lib/plausible/stats/filter_suggestions.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ defmodule Plausible.Stats.FilterSuggestions do

alias Plausible.Stats.Query
alias Plausible.Stats.Imported
alias Plausible.Stats.Filters

def filter_suggestions(site, query, "country", filter_search) do
matches = Location.search_country(filter_search)
Expand Down Expand Up @@ -187,10 +188,10 @@ defmodule Plausible.Stats.FilterSuggestions do
def filter_suggestions(site, query, "prop_value", filter_search) do
filter_query = if filter_search == nil, do: "%", else: "%#{filter_search}%"

[_op, "event:props:" <> key | _rest] = Query.get_filter_by_prefix(query, "event:props")
[_op, "event:props:" <> key | _rest] = Filters.get_toplevel_filter(query, "event:props")

none_q =
from(e in base_event_query(site, Query.remove_filters(query, ["event:props"])),
from(e in base_event_query(site, Query.remove_top_level_filters(query, ["event:props"])),
select: "(none)",
where: not has_key(e, :meta, ^key),
limit: 1
Expand Down
85 changes: 85 additions & 0 deletions lib/plausible/stats/filters/filters.ex
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,89 @@ defmodule Plausible.Stats.Filters do
|> List.last()
|> String.to_existing_atom()
end

def dimensions_used_in_filters(filters, opts \\ []) do
min_depth = Keyword.get(opts, :min_depth, 0)

filters
|> traverse()
|> Enum.filter(fn {_filter, _root, depth} -> depth >= min_depth end)
|> Enum.map(fn {[_operator, dimension | _rest], _root, _depth} -> dimension end)
end

def filtering_on_dimension?(query, dimension) do
dimension in dimensions_used_in_filters(query.filters)
end

@doc """
Gets the first top level filter with matching dimension (or nil).
Only use in cases where it's known that filters are only set on the top level as it
does not handle AND/OR/NOT!
"""
def get_toplevel_filter(query, prefix) do
Enum.find(query.filters, fn [_op, dimension | _rest] ->
String.starts_with?(dimension, prefix)
end)
end

def rename_dimensions_used_in_filter(filters, renames) do
transform_filters(filters, fn
[operation, dimension, clauses] ->
[[operation, Map.get(renames, dimension, dimension), clauses]]

_subtree ->
nil
end)
end

@doc """
Updates filters via `transformer`.
Transformer will receive each node (filter, and/or/not subtree) of
query and must return a list of nodes to replace it with or nil
to ignore and look deeper.
"""
def transform_filters(filters, transformer) do
filters
|> Enum.flat_map(&transform_tree(&1, transformer))
end

defp transform_tree(filter, transformer) do
case {transformer.(filter), filter} do
# Transformer did not return that value - transform that subtree
{nil, [:not, child_filter]} ->
[[:not, transform_tree(child_filter, transformer)]]

{nil, [operation, filters]} when operation in [:and, :or] ->
[[operation, transform_filters(filters, transformer)]]

# Reached a leaf node, return existing value
{nil, filter} ->
[[filter]]

# Transformer returned a value - don't transform that subtree
{transformed_filters, _filter} ->
transformed_filters
end
end

defp traverse(filters, root \\ nil, depth \\ -1) do
filters
|> Enum.flat_map(&traverse_tree(&1, root || &1, depth + 1))
end

defp traverse_tree(filter, root, depth) do
case filter do
[:not, child_filter] ->
traverse_tree(child_filter, root, depth + 1)

[operation, filters] when operation in [:and, :or] ->
traverse(filters, root || filter, depth + 1)

# Leaf node
_ ->
[{filter, root, depth}]
end
end
end
62 changes: 54 additions & 8 deletions lib/plausible/stats/filters/query_parser.ex
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
defmodule Plausible.Stats.Filters.QueryParser do
@moduledoc false

alias Plausible.Stats.{TableDecider, Filters, Query, Metrics, DateTimeRange, JSONSchema}
alias Plausible.Stats.{TableDecider, Filters, Metrics, DateTimeRange, JSONSchema}

@default_include %{
imports: false,
Expand Down Expand Up @@ -36,8 +36,10 @@ defmodule Plausible.Stats.Filters.QueryParser do
include: include
},
:ok <- validate_order_by(query),
:ok <- validate_goal_filters(query),
:ok <- validate_custom_props_access(site, query),
:ok <- validate_toplevel_only_filter_dimension(query),
:ok <- validate_special_metrics_filters(query),
:ok <- validate_filtered_goals_exist(query),
:ok <- validate_metrics(query),
:ok <- validate_include(query) do
{:ok, query}
Expand All @@ -63,9 +65,9 @@ defmodule Plausible.Stats.Filters.QueryParser do

defp parse_filter(filter) do
with {:ok, operator} <- parse_operator(filter),
{:ok, filter_key} <- parse_filter_key(filter),
{:ok, second} <- parse_filter_second(operator, filter),
{:ok, rest} <- parse_filter_rest(operator, filter) do
{:ok, [operator, filter_key | rest]}
{:ok, [operator, second | rest]}
end
end

Expand All @@ -75,8 +77,18 @@ defmodule Plausible.Stats.Filters.QueryParser do
defp parse_operator(["does_not_match" | _rest]), do: {:ok, :does_not_match}
defp parse_operator(["contains" | _rest]), do: {:ok, :contains}
defp parse_operator(["does_not_contain" | _rest]), do: {:ok, :does_not_contain}
defp parse_operator(["not" | _rest]), do: {:ok, :not}
defp parse_operator(["and" | _rest]), do: {:ok, :and}
defp parse_operator(["or" | _rest]), do: {:ok, :or}
defp parse_operator(filter), do: {:error, "Unknown operator for filter '#{i(filter)}'."}

def parse_filter_second(:not, [_, filter | _rest]), do: parse_filter(filter)

def parse_filter_second(operator, [_, filters | _rest]) when operator in [:and, :or],
do: parse_filters(filters)

def parse_filter_second(_operator, filter), do: parse_filter_key(filter)

defp parse_filter_key([_operator, filter_key | _rest] = filter) do
parse_filter_key_string(filter_key, "Invalid filter '#{i(filter)}")
end
Expand All @@ -87,6 +99,10 @@ defmodule Plausible.Stats.Filters.QueryParser do
when operator in [:is, :is_not, :matches, :does_not_match, :contains, :does_not_contain],
do: parse_clauses_list(filter)

defp parse_filter_rest(operator, _filter)
when operator in [:not, :and, :or],
do: {:ok, []}

defp parse_clauses_list([operation, filter_key, list] = filter) when is_list(list) do
all_strings? = Enum.all?(list, &is_binary/1)
all_integers? = Enum.all?(list, &is_integer/1)
Expand Down Expand Up @@ -359,7 +375,37 @@ defmodule Plausible.Stats.Filters.QueryParser do
end
end

defp validate_goal_filters(query) do
@only_toplevel ["event:goal", "event:hostname"]
defp validate_toplevel_only_filter_dimension(query) do
not_toplevel = Filters.dimensions_used_in_filters(query.filters, min_depth: 1)

if Enum.any?(not_toplevel, &(&1 in @only_toplevel)) do
{:error,
"Invalid filters. Dimension `#{List.first(not_toplevel)}` can only be filtered at the top level."}
else
:ok
end
end

@special_metrics [:conversion_rate, :group_conversion_rate]
defp validate_special_metrics_filters(query) do
special_metric? = Enum.any?(@special_metrics, &(&1 in query.metrics))

deep_custom_property? =
query.filters
|> Filters.dimensions_used_in_filters(min_depth: 1)
|> Enum.any?(fn dimension -> String.starts_with?(dimension, "event:props:") end)

if special_metric? and deep_custom_property? do
{:error,
"Invalid filters. When `conversion_rate` or `group_conversion_rate` metrics are used, custom property filters can only be used on top level."}
else
:ok
end
end

defp validate_filtered_goals_exist(query) do
# Note: Only works since event:goal is allowed as a top level filter
goal_filter_clauses =
Enum.flat_map(query.filters, fn
[:is, "event:goal", clauses] -> clauses
Expand Down Expand Up @@ -396,7 +442,7 @@ defmodule Plausible.Stats.Filters.QueryParser do
defp validate_custom_props_access(_site, query, allowed_props) do
valid? =
query.filters
|> Enum.map(fn [_operation, filter_key | _rest] -> filter_key end)
|> Filters.dimensions_used_in_filters()
|> Enum.concat(query.dimensions)
|> Enum.all?(fn
"event:props:" <> prop -> prop in allowed_props
Expand All @@ -418,7 +464,7 @@ defmodule Plausible.Stats.Filters.QueryParser do

defp validate_metric(metric, query) when metric in [:conversion_rate, :group_conversion_rate] do
if Enum.member?(query.dimensions, "event:goal") or
not is_nil(Query.get_filter(query, "event:goal")) do
Filters.filtering_on_dimension?(query, "event:goal") do
:ok
else
{:error, "Metric `#{metric}` can only be queried with event:goal filters or dimensions."}
Expand All @@ -427,7 +473,7 @@ defmodule Plausible.Stats.Filters.QueryParser do

defp validate_metric(:views_per_visit = metric, query) do
cond do
not is_nil(Query.get_filter(query, "event:page")) ->
Filters.filtering_on_dimension?(query, "event:page") ->
{:error, "Metric `#{metric}` cannot be queried with a filter on `event:page`."}

length(query.dimensions) > 0 ->
Expand Down
Loading

0 comments on commit 8fa3a83

Please sign in to comment.