From 373a6b565fbee530b1d09275c020ebda714c97b4 Mon Sep 17 00:00:00 2001 From: Scott Cooper Date: Thu, 23 Oct 2025 14:38:16 -0700 Subject: [PATCH 1/6] feat(issues): Include empty tags in group tags endpoint Add support for including empty tag values in the group tags endpoint (api/0/issues/{id}/tags/) when the organizations:issue-tags-include-empty-values feature flag is enabled. Changes: - Add include_empty_values parameter to get_group_tag_keys_and_top_values methods - Add __get_empty_value_stats_map helper method to query empty tag stats - Update group_tags endpoint to check feature flag and pass parameter - Add feature flag organizations:issue-tags-include-empty-values - Add tests for empty tag inclusion behavior --- src/sentry/features/temporary.py | 2 + src/sentry/issues/endpoints/group_tags.py | 8 +- src/sentry/tagstore/base.py | 1 + src/sentry/tagstore/snuba/backend.py | 80 ++++++++++++++++++- .../issues/endpoints/test_group_tags.py | 74 +++++++++++++++++ 5 files changed, 163 insertions(+), 2 deletions(-) diff --git a/src/sentry/features/temporary.py b/src/sentry/features/temporary.py index d35a1c7b150bff..47bd6e1f841697 100644 --- a/src/sentry/features/temporary.py +++ b/src/sentry/features/temporary.py @@ -184,6 +184,8 @@ def register_temporary_features(manager: FeatureManager) -> None: manager.add("organizations:issue-detection-sort-spans", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) # Whether to allow issue only search on the issue list manager.add("organizations:issue-search-allow-postgres-only-search", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) + # Include empty tag count in issue tag totals/values + manager.add("organizations:issue-tags-include-empty-values", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) # Enable the new issue category mapping manager.add("organizations:issue-taxonomy", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) manager.add("organizations:metric-issue-poc", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) diff --git a/src/sentry/issues/endpoints/group_tags.py b/src/sentry/issues/endpoints/group_tags.py index 03919970416655..55d52a779a8d2a 100644 --- a/src/sentry/issues/endpoints/group_tags.py +++ b/src/sentry/issues/endpoints/group_tags.py @@ -5,7 +5,7 @@ from rest_framework.request import Request from rest_framework.response import Response -from sentry import tagstore +from sentry import features, tagstore from sentry.api.api_publish_status import ApiPublishStatus from sentry.api.base import region_silo_endpoint from sentry.api.helpers.environments import get_environments @@ -62,12 +62,18 @@ def get(self, request: Request, group: Group) -> Response: environment_ids = [e.id for e in get_environments(request, group.project.organization)] + include_empty_values = features.has( + "organizations:issue-tags-include-empty-values", + organization=group.project.organization, + ) + tag_keys = backend.get_group_tag_keys_and_top_values( group, environment_ids, keys=keys, value_limit=value_limit, tenant_ids={"organization_id": group.project.organization_id}, + include_empty_values=include_empty_values, ) data = serialize(tag_keys, request.user) diff --git a/src/sentry/tagstore/base.py b/src/sentry/tagstore/base.py index 5bedd5ed2e4ac3..5d0ad4898eb53b 100644 --- a/src/sentry/tagstore/base.py +++ b/src/sentry/tagstore/base.py @@ -301,6 +301,7 @@ def get_group_tag_keys_and_top_values( keys: list[str] | None = None, value_limit=TOP_VALUES_DEFAULT_LIMIT, tenant_ids=None, + include_empty_values: bool | None = None, **kwargs, ): diff --git a/src/sentry/tagstore/snuba/backend.py b/src/sentry/tagstore/snuba/backend.py index 6844199f3a437d..02d41bbdb81298 100644 --- a/src/sentry/tagstore/snuba/backend.py +++ b/src/sentry/tagstore/snuba/backend.py @@ -684,6 +684,7 @@ def get_group_tag_keys_and_top_values( keys: list[str] | None = None, value_limit: int = TOP_VALUES_DEFAULT_LIMIT, tenant_ids=None, + include_empty_values: bool | None = None, **kwargs, ): # Similar to __get_tag_key_and_top_values except we get the top values @@ -726,7 +727,29 @@ def get_group_tag_keys_and_top_values( tenant_ids=tenant_ids, ) - # Then supplement the key objects with the top values for each. + if include_empty_values: + keys_to_check = list(values_by_key.keys()) or [k.key for k in keys_with_counts] + empty_stats_map = self.__get_empty_value_stats_map( + dataset=dataset, + filters=filters, + conditions=conditions, + keys_to_check=keys_to_check, + tenant_ids=tenant_ids, + start=kwargs.get("start"), + end=kwargs.get("end"), + ) + for k, stats in empty_stats_map.items(): + if not stats or stats.get("count", 0) <= 0: + continue + vals = values_by_key.get(k, {}) + vals[""] = stats + # Re-sort after adding empty count and trim to value_limit + sorted_items = sorted(vals.items(), key=lambda kv: (-kv[1]["count"], str(kv[0])))[ + :value_limit + ] + values_by_key[k] = {vk: vd for vk, vd in sorted_items} + + # Then supplement the key objects with the top values for each, trimming to value_limit for keyobj in keys_with_counts: key = keyobj.key values = values_by_key.get(key, dict()) @@ -744,6 +767,61 @@ def get_group_tag_keys_and_top_values( return keys_with_counts + def __get_empty_value_stats_map( + self, + dataset: Dataset, + filters: dict[str, list[Any]], + conditions: list, + keys_to_check: list[str], + tenant_ids: dict[str, int | str] | None, + start, + end, + ) -> dict[str, dict[str, Any]]: + if not keys_to_check: + return {} + + empty_alias_map: dict[str, dict[str, str]] = {} + selected_columns_empty: list[list] = [] + for i, k in enumerate(keys_to_check): + cnt_alias = f"empty_cnt_{i}" + min_alias = f"empty_min_{i}" + max_alias = f"empty_max_{i}" + empty_alias_map[k] = {"count": cnt_alias, "first": min_alias, "last": max_alias} + tag_expr = self.format_string.format(k) + empty_predicate = ["equals", [tag_expr, ""]] + selected_columns_empty.append(["countIf", [empty_predicate], cnt_alias]) + selected_columns_empty.append(["minIf", [SEEN_COLUMN, empty_predicate], min_alias]) + selected_columns_empty.append(["maxIf", [SEEN_COLUMN, empty_predicate], max_alias]) + + empty_filters = dict(filters) + if self.key_column in empty_filters: + # For empty-value stats, do not restrict by tags_key; events that + # store an empty value may omit the key entirely. Removing this + # filter ensures those events are counted. + del empty_filters[self.key_column] + + empty_results = snuba.query( + dataset=dataset, + start=start, + end=end, + groupby=None, + conditions=conditions, + filter_keys=empty_filters, + aggregations=[], + selected_columns=selected_columns_empty, + referrer="tagstore._get_tag_keys_and_top_values_empty_counts", + tenant_ids=tenant_ids, + ) + stats_map: dict[str, dict[str, Any]] = {} + for k in keys_to_check: + aliases = empty_alias_map[k] + stats_map[k] = { + "count": empty_results.get(aliases["count"], 0) or 0, + "first_seen": empty_results.get(aliases["first"]), + "last_seen": empty_results.get(aliases["last"]), + } + return stats_map + def get_release_tags(self, organization_id, project_ids, environment_id, versions): filters = {"project_id": project_ids} if environment_id: diff --git a/tests/sentry/issues/endpoints/test_group_tags.py b/tests/sentry/issues/endpoints/test_group_tags.py index 979fc3c1942745..45cda7e60053a0 100644 --- a/tests/sentry/issues/endpoints/test_group_tags.py +++ b/tests/sentry/issues/endpoints/test_group_tags.py @@ -304,6 +304,80 @@ def test_device_class(self) -> None: assert top_values[1]["value"] == "low" assert top_values[2]["value"] == "medium" + def test_include_empty_values_for_specific_key_with_feature(self) -> None: + event = self.store_event( + data={ + "fingerprint": ["group-empty-all"], + "tags": {}, + "timestamp": before_now(minutes=1).isoformat(), + }, + project_id=self.project.id, + assert_no_errors=False, + ) + self.store_event( + data={ + "fingerprint": ["group-empty-all"], + "tags": {"foo": "bar"}, + "timestamp": before_now(minutes=1).isoformat(), + }, + project_id=self.project.id, + ) + + self.login_as(user=self.user) + + url = f"/api/0/issues/{event.group.id}/tags/?key=foo" + + # Off by default: empty should be excluded from topValues + response = self.client.get(url, format="json") + assert response.status_code == 200 + assert len(response.data) == 1 + values = {v["value"] for v in response.data[0]["topValues"]} + assert values == {"bar"} + + # With feature: empty should be included + with self.feature({"organizations:issue-tags-include-empty-values": True}): + response = self.client.get(url, format="json") + assert response.status_code == 200 + values = {v["value"] for v in response.data[0]["topValues"]} + assert values == {"", "bar"} + + def test_include_empty_values_in_all_tags_with_feature(self) -> None: + event = self.store_event( + data={ + "fingerprint": ["group-empty-all-tags"], + "tags": {"foo": ""}, + "timestamp": before_now(minutes=1).isoformat(), + }, + project_id=self.project.id, + assert_no_errors=False, + ) + self.store_event( + data={ + "fingerprint": ["group-empty-all-tags"], + "tags": {"foo": "bar"}, + "timestamp": before_now(minutes=1).isoformat(), + }, + project_id=self.project.id, + ) + + self.login_as(user=self.user) + url = f"/api/0/issues/{event.group.id}/tags/?limit=10" + + # Off by default: empty should be excluded from the All Tags list + response = self.client.get(url, format="json") + assert response.status_code == 200 + foo = next((t for t in response.data if t["key"] == "foo"), None) + assert foo is not None + assert {v["value"] for v in foo["topValues"]} == {"bar"} + + # With feature: empty should be included in the All Tags list + with self.feature({"organizations:issue-tags-include-empty-values": True}): + response = self.client.get(url, format="json") + assert response.status_code == 200 + foo = next((t for t in response.data if t["key"] == "foo"), None) + assert foo is not None + assert {v["value"] for v in foo["topValues"]} == {"", "bar"} + def test_flags(self) -> None: event1 = self.store_event( data={ From 167108af5ddea8d20628ae2e35bf43bacb61e197 Mon Sep 17 00:00:00 2001 From: Scott Cooper Date: Thu, 23 Oct 2025 15:09:12 -0700 Subject: [PATCH 2/6] cleanup comment --- src/sentry/tagstore/snuba/backend.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sentry/tagstore/snuba/backend.py b/src/sentry/tagstore/snuba/backend.py index 02d41bbdb81298..d5080a86b6ed52 100644 --- a/src/sentry/tagstore/snuba/backend.py +++ b/src/sentry/tagstore/snuba/backend.py @@ -749,7 +749,6 @@ def get_group_tag_keys_and_top_values( ] values_by_key[k] = {vk: vd for vk, vd in sorted_items} - # Then supplement the key objects with the top values for each, trimming to value_limit for keyobj in keys_with_counts: key = keyobj.key values = values_by_key.get(key, dict()) From a43ed89f61d401e630b4087b2a24603d8a92524b Mon Sep 17 00:00:00 2001 From: Scott Cooper Date: Thu, 23 Oct 2025 15:22:05 -0700 Subject: [PATCH 3/6] =false --- src/sentry/tagstore/snuba/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/tagstore/snuba/backend.py b/src/sentry/tagstore/snuba/backend.py index d5080a86b6ed52..53ee341c086a8f 100644 --- a/src/sentry/tagstore/snuba/backend.py +++ b/src/sentry/tagstore/snuba/backend.py @@ -684,7 +684,7 @@ def get_group_tag_keys_and_top_values( keys: list[str] | None = None, value_limit: int = TOP_VALUES_DEFAULT_LIMIT, tenant_ids=None, - include_empty_values: bool | None = None, + include_empty_values=False, **kwargs, ): # Similar to __get_tag_key_and_top_values except we get the top values From d8a8df12b9404f51a58ea3e7bc9652b3fdbaacfc Mon Sep 17 00:00:00 2001 From: Scott Cooper Date: Tue, 28 Oct 2025 10:28:06 -0700 Subject: [PATCH 4/6] cleanup --- src/sentry/tagstore/base.py | 2 +- src/sentry/tagstore/snuba/backend.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sentry/tagstore/base.py b/src/sentry/tagstore/base.py index 5d0ad4898eb53b..ed2dbdc452dab0 100644 --- a/src/sentry/tagstore/base.py +++ b/src/sentry/tagstore/base.py @@ -301,7 +301,7 @@ def get_group_tag_keys_and_top_values( keys: list[str] | None = None, value_limit=TOP_VALUES_DEFAULT_LIMIT, tenant_ids=None, - include_empty_values: bool | None = None, + include_empty_values=False, **kwargs, ): diff --git a/src/sentry/tagstore/snuba/backend.py b/src/sentry/tagstore/snuba/backend.py index 53ee341c086a8f..4bf0f32d1067b6 100644 --- a/src/sentry/tagstore/snuba/backend.py +++ b/src/sentry/tagstore/snuba/backend.py @@ -815,7 +815,7 @@ def __get_empty_value_stats_map( for k in keys_to_check: aliases = empty_alias_map[k] stats_map[k] = { - "count": empty_results.get(aliases["count"], 0) or 0, + "count": empty_results.get(aliases["count"], 0), "first_seen": empty_results.get(aliases["first"]), "last_seen": empty_results.get(aliases["last"]), } From 69076fe73260524f2a2f2e8c0775ed293442426f Mon Sep 17 00:00:00 2001 From: Scott Cooper Date: Tue, 28 Oct 2025 10:35:49 -0700 Subject: [PATCH 5/6] add another assertion --- tests/sentry/issues/endpoints/test_group_tags.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/sentry/issues/endpoints/test_group_tags.py b/tests/sentry/issues/endpoints/test_group_tags.py index 45cda7e60053a0..ff7b6bc86b8d6d 100644 --- a/tests/sentry/issues/endpoints/test_group_tags.py +++ b/tests/sentry/issues/endpoints/test_group_tags.py @@ -340,6 +340,7 @@ def test_include_empty_values_for_specific_key_with_feature(self) -> None: assert response.status_code == 200 values = {v["value"] for v in response.data[0]["topValues"]} assert values == {"", "bar"} + assert response.data[0]["topValues"][0]["count"] == 1 def test_include_empty_values_in_all_tags_with_feature(self) -> None: event = self.store_event( From e56dcc89bf7832a825d73ed3749562510d485932 Mon Sep 17 00:00:00 2001 From: Scott Cooper Date: Wed, 29 Oct 2025 11:11:27 -0700 Subject: [PATCH 6/6] fix: include actor in feature flag check, fix org param --- src/sentry/issues/endpoints/group_tags.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sentry/issues/endpoints/group_tags.py b/src/sentry/issues/endpoints/group_tags.py index 55d52a779a8d2a..37e760a2d5050d 100644 --- a/src/sentry/issues/endpoints/group_tags.py +++ b/src/sentry/issues/endpoints/group_tags.py @@ -64,7 +64,8 @@ def get(self, request: Request, group: Group) -> Response: include_empty_values = features.has( "organizations:issue-tags-include-empty-values", - organization=group.project.organization, + group.project.organization, + actor=request.user, ) tag_keys = backend.get_group_tag_keys_and_top_values(