-
-
Notifications
You must be signed in to change notification settings - Fork 4.5k
feat(issues): Include empty tags in group tags endpoint #102049
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
373a6b5
167108a
a43ed89
d8a8df1
69076fe
d431559
5f6506e
e56dcc8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,7 @@ | |
| from rest_framework.request import Request | ||
| from rest_framework.response import Response | ||
|
|
||
| from sentry import tagstore | ||
| from sentry import features, tagstore | ||
| from sentry.api.api_publish_status import ApiPublishStatus | ||
| from sentry.api.base import region_silo_endpoint | ||
| from sentry.api.helpers.environments import get_environments | ||
|
|
@@ -62,12 +62,18 @@ def get(self, request: Request, group: Group) -> Response: | |
|
|
||
| environment_ids = [e.id for e in get_environments(request, group.project.organization)] | ||
|
|
||
| include_empty_values = features.has( | ||
| "organizations:issue-tags-include-empty-values", | ||
| organization=group.project.organization, | ||
|
||
| ) | ||
|
|
||
| tag_keys = backend.get_group_tag_keys_and_top_values( | ||
| group, | ||
| environment_ids, | ||
| keys=keys, | ||
| value_limit=value_limit, | ||
| tenant_ids={"organization_id": group.project.organization_id}, | ||
| include_empty_values=include_empty_values, | ||
| ) | ||
|
|
||
| data = serialize(tag_keys, request.user) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -684,6 +684,7 @@ def get_group_tag_keys_and_top_values( | |
| keys: list[str] | None = None, | ||
| value_limit: int = TOP_VALUES_DEFAULT_LIMIT, | ||
| tenant_ids=None, | ||
| include_empty_values=False, | ||
| **kwargs, | ||
| ): | ||
| # Similar to __get_tag_key_and_top_values except we get the top values | ||
|
|
@@ -726,7 +727,28 @@ def get_group_tag_keys_and_top_values( | |
| tenant_ids=tenant_ids, | ||
| ) | ||
|
|
||
| # Then supplement the key objects with the top values for each. | ||
| if include_empty_values: | ||
| keys_to_check = list(values_by_key.keys()) or [k.key for k in keys_with_counts] | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Tag Key Logic Fails with Empty ValuesThe |
||
| empty_stats_map = self.__get_empty_value_stats_map( | ||
| dataset=dataset, | ||
| filters=filters, | ||
| conditions=conditions, | ||
| keys_to_check=keys_to_check, | ||
| tenant_ids=tenant_ids, | ||
| start=kwargs.get("start"), | ||
| end=kwargs.get("end"), | ||
| ) | ||
| for k, stats in empty_stats_map.items(): | ||
| if not stats or stats.get("count", 0) <= 0: | ||
| continue | ||
| vals = values_by_key.get(k, {}) | ||
|
Comment on lines
+742
to
+744
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: from my understanding think these need to be |
||
| vals[""] = stats | ||
| # Re-sort after adding empty count and trim to value_limit | ||
| sorted_items = sorted(vals.items(), key=lambda kv: (-kv[1]["count"], str(kv[0])))[ | ||
| :value_limit | ||
| ] | ||
| values_by_key[k] = {vk: vd for vk, vd in sorted_items} | ||
|
|
||
| for keyobj in keys_with_counts: | ||
| key = keyobj.key | ||
| values = values_by_key.get(key, dict()) | ||
|
|
@@ -744,6 +766,61 @@ def get_group_tag_keys_and_top_values( | |
|
|
||
| return keys_with_counts | ||
|
|
||
| def __get_empty_value_stats_map( | ||
| self, | ||
| dataset: Dataset, | ||
| filters: dict[str, list[Any]], | ||
| conditions: list, | ||
| keys_to_check: list[str], | ||
| tenant_ids: dict[str, int | str] | None, | ||
| start, | ||
| end, | ||
| ) -> dict[str, dict[str, Any]]: | ||
| if not keys_to_check: | ||
| return {} | ||
|
|
||
| empty_alias_map: dict[str, dict[str, str]] = {} | ||
| selected_columns_empty: list[list] = [] | ||
| for i, k in enumerate(keys_to_check): | ||
| cnt_alias = f"empty_cnt_{i}" | ||
| min_alias = f"empty_min_{i}" | ||
| max_alias = f"empty_max_{i}" | ||
| empty_alias_map[k] = {"count": cnt_alias, "first": min_alias, "last": max_alias} | ||
| tag_expr = self.format_string.format(k) | ||
| empty_predicate = ["equals", [tag_expr, ""]] | ||
| selected_columns_empty.append(["countIf", [empty_predicate], cnt_alias]) | ||
| selected_columns_empty.append(["minIf", [SEEN_COLUMN, empty_predicate], min_alias]) | ||
| selected_columns_empty.append(["maxIf", [SEEN_COLUMN, empty_predicate], max_alias]) | ||
|
|
||
| empty_filters = dict(filters) | ||
| if self.key_column in empty_filters: | ||
| # For empty-value stats, do not restrict by tags_key; events that | ||
| # store an empty value may omit the key entirely. Removing this | ||
| # filter ensures those events are counted. | ||
| del empty_filters[self.key_column] | ||
|
|
||
| empty_results = snuba.query( | ||
| dataset=dataset, | ||
| start=start, | ||
| end=end, | ||
| groupby=None, | ||
| conditions=conditions, | ||
| filter_keys=empty_filters, | ||
| aggregations=[], | ||
| selected_columns=selected_columns_empty, | ||
| referrer="tagstore._get_tag_keys_and_top_values_empty_counts", | ||
| tenant_ids=tenant_ids, | ||
| ) | ||
|
Comment on lines
+802
to
+813
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we going to end up making an extra query for every org to fetch these empty keys? |
||
| stats_map: dict[str, dict[str, Any]] = {} | ||
| for k in keys_to_check: | ||
| aliases = empty_alias_map[k] | ||
| stats_map[k] = { | ||
| "count": empty_results.get(aliases["count"], 0) or 0, | ||
scttcper marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| "first_seen": empty_results.get(aliases["first"]), | ||
| "last_seen": empty_results.get(aliases["last"]), | ||
| } | ||
scttcper marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return stats_map | ||
|
|
||
| def get_release_tags(self, organization_id, project_ids, environment_id, versions): | ||
| filters = {"project_id": project_ids} | ||
| if environment_id: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -304,6 +304,80 @@ def test_device_class(self) -> None: | |
| assert top_values[1]["value"] == "low" | ||
| assert top_values[2]["value"] == "medium" | ||
|
|
||
| def test_include_empty_values_for_specific_key_with_feature(self) -> None: | ||
| event = self.store_event( | ||
| data={ | ||
| "fingerprint": ["group-empty-all"], | ||
| "tags": {}, | ||
| "timestamp": before_now(minutes=1).isoformat(), | ||
| }, | ||
| project_id=self.project.id, | ||
| assert_no_errors=False, | ||
| ) | ||
| self.store_event( | ||
| data={ | ||
| "fingerprint": ["group-empty-all"], | ||
| "tags": {"foo": "bar"}, | ||
| "timestamp": before_now(minutes=1).isoformat(), | ||
| }, | ||
| project_id=self.project.id, | ||
| ) | ||
|
|
||
| self.login_as(user=self.user) | ||
|
|
||
| url = f"/api/0/issues/{event.group.id}/tags/?key=foo" | ||
|
|
||
| # Off by default: empty should be excluded from topValues | ||
| response = self.client.get(url, format="json") | ||
| assert response.status_code == 200 | ||
| assert len(response.data) == 1 | ||
| values = {v["value"] for v in response.data[0]["topValues"]} | ||
| assert values == {"bar"} | ||
|
|
||
| # With feature: empty should be included | ||
| with self.feature({"organizations:issue-tags-include-empty-values": True}): | ||
| response = self.client.get(url, format="json") | ||
| assert response.status_code == 200 | ||
| values = {v["value"] for v in response.data[0]["topValues"]} | ||
| assert values == {"", "bar"} | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we also check that the empty tag count is correct (so response.data[0]["topValues"]["count"] is 1 or whatever) |
||
|
|
||
| def test_include_empty_values_in_all_tags_with_feature(self) -> None: | ||
| event = self.store_event( | ||
| data={ | ||
| "fingerprint": ["group-empty-all-tags"], | ||
| "tags": {"foo": ""}, | ||
| "timestamp": before_now(minutes=1).isoformat(), | ||
| }, | ||
| project_id=self.project.id, | ||
| assert_no_errors=False, | ||
| ) | ||
| self.store_event( | ||
| data={ | ||
| "fingerprint": ["group-empty-all-tags"], | ||
| "tags": {"foo": "bar"}, | ||
| "timestamp": before_now(minutes=1).isoformat(), | ||
| }, | ||
| project_id=self.project.id, | ||
| ) | ||
|
|
||
| self.login_as(user=self.user) | ||
| url = f"/api/0/issues/{event.group.id}/tags/?limit=10" | ||
|
|
||
| # Off by default: empty should be excluded from the All Tags list | ||
| response = self.client.get(url, format="json") | ||
| assert response.status_code == 200 | ||
| foo = next((t for t in response.data if t["key"] == "foo"), None) | ||
| assert foo is not None | ||
| assert {v["value"] for v in foo["topValues"]} == {"bar"} | ||
|
|
||
| # With feature: empty should be included in the All Tags list | ||
| with self.feature({"organizations:issue-tags-include-empty-values": True}): | ||
| response = self.client.get(url, format="json") | ||
| assert response.status_code == 200 | ||
| foo = next((t for t in response.data if t["key"] == "foo"), None) | ||
| assert foo is not None | ||
| assert {v["value"] for v in foo["topValues"]} == {"", "bar"} | ||
|
|
||
| def test_flags(self) -> None: | ||
| event1 = self.store_event( | ||
| data={ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you mentioned you'll need this in the frontend, will you want
api_expose=True?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think in the frontend i can just handle empty strings and let the backend decide when to send them the value is
""which doesn't exist today.