Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/sentry/features/temporary.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ def register_temporary_features(manager: FeatureManager) -> None:
manager.add("organizations:issue-detection-sort-spans", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Whether to allow issue only search on the issue list
manager.add("organizations:issue-search-allow-postgres-only-search", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
# Include empty tag count in issue tag totals/values
manager.add("organizations:issue-tags-include-empty-values", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you mentioned you'll need this in the frontend, will you want api_expose=True?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in the frontend i can just handle empty strings and let the backend decide when to send them the value is "" which doesn't exist today.

# Enable the new issue category mapping
manager.add("organizations:issue-taxonomy", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
manager.add("organizations:metric-issue-poc", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
Expand Down
8 changes: 7 additions & 1 deletion src/sentry/issues/endpoints/group_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from rest_framework.request import Request
from rest_framework.response import Response

from sentry import tagstore
from sentry import features, tagstore
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.base import region_silo_endpoint
from sentry.api.helpers.environments import get_environments
Expand Down Expand Up @@ -62,12 +62,18 @@ def get(self, request: Request, group: Group) -> Response:

environment_ids = [e.id for e in get_environments(request, group.project.organization)]

include_empty_values = features.has(
"organizations:issue-tags-include-empty-values",
organization=group.project.organization,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we're doing this on group.project.org, would making this a project option be better?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i just need it behind a flag, org is usually the easier one to roll out

)

tag_keys = backend.get_group_tag_keys_and_top_values(
group,
environment_ids,
keys=keys,
value_limit=value_limit,
tenant_ids={"organization_id": group.project.organization_id},
include_empty_values=include_empty_values,
)

data = serialize(tag_keys, request.user)
Expand Down
1 change: 1 addition & 0 deletions src/sentry/tagstore/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ def get_group_tag_keys_and_top_values(
keys: list[str] | None = None,
value_limit=TOP_VALUES_DEFAULT_LIMIT,
tenant_ids=None,
include_empty_values: bool | None = None,
**kwargs,
):

Expand Down
79 changes: 78 additions & 1 deletion src/sentry/tagstore/snuba/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,7 @@ def get_group_tag_keys_and_top_values(
keys: list[str] | None = None,
value_limit: int = TOP_VALUES_DEFAULT_LIMIT,
tenant_ids=None,
include_empty_values=False,
**kwargs,
):
# Similar to __get_tag_key_and_top_values except we get the top values
Expand Down Expand Up @@ -726,7 +727,28 @@ def get_group_tag_keys_and_top_values(
tenant_ids=tenant_ids,
)

# Then supplement the key objects with the top values for each.
if include_empty_values:
keys_to_check = list(values_by_key.keys()) or [k.key for k in keys_with_counts]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Tag Key Logic Fails with Empty Values

The keys_to_check logic in get_group_tag_keys_and_top_values incorrectly determines which keys to check for empty values. The expression list(values_by_key.keys()) or [k.key for k in keys_with_counts] can omit tags that only have empty values, especially when other tags have non-empty values or when specific keys are requested. This leads to incomplete tag data.

Fix in Cursor Fix in Web

empty_stats_map = self.__get_empty_value_stats_map(
dataset=dataset,
filters=filters,
conditions=conditions,
keys_to_check=keys_to_check,
tenant_ids=tenant_ids,
start=kwargs.get("start"),
end=kwargs.get("end"),
)
for k, stats in empty_stats_map.items():
if not stats or stats.get("count", 0) <= 0:
continue
vals = values_by_key.get(k, {})
Comment on lines +742 to +744
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: from my understanding think these need to be .get(), since empty_stats_map is either {} (so the iteration won't run) or are guarenteed to have k (same with above for count)?

vals[""] = stats
# Re-sort after adding empty count and trim to value_limit
sorted_items = sorted(vals.items(), key=lambda kv: (-kv[1]["count"], str(kv[0])))[
:value_limit
]
values_by_key[k] = {vk: vd for vk, vd in sorted_items}

for keyobj in keys_with_counts:
key = keyobj.key
values = values_by_key.get(key, dict())
Expand All @@ -744,6 +766,61 @@ def get_group_tag_keys_and_top_values(

return keys_with_counts

def __get_empty_value_stats_map(
self,
dataset: Dataset,
filters: dict[str, list[Any]],
conditions: list,
keys_to_check: list[str],
tenant_ids: dict[str, int | str] | None,
start,
end,
) -> dict[str, dict[str, Any]]:
if not keys_to_check:
return {}

empty_alias_map: dict[str, dict[str, str]] = {}
selected_columns_empty: list[list] = []
for i, k in enumerate(keys_to_check):
cnt_alias = f"empty_cnt_{i}"
min_alias = f"empty_min_{i}"
max_alias = f"empty_max_{i}"
empty_alias_map[k] = {"count": cnt_alias, "first": min_alias, "last": max_alias}
tag_expr = self.format_string.format(k)
empty_predicate = ["equals", [tag_expr, ""]]
selected_columns_empty.append(["countIf", [empty_predicate], cnt_alias])
selected_columns_empty.append(["minIf", [SEEN_COLUMN, empty_predicate], min_alias])
selected_columns_empty.append(["maxIf", [SEEN_COLUMN, empty_predicate], max_alias])

empty_filters = dict(filters)
if self.key_column in empty_filters:
# For empty-value stats, do not restrict by tags_key; events that
# store an empty value may omit the key entirely. Removing this
# filter ensures those events are counted.
del empty_filters[self.key_column]

empty_results = snuba.query(
dataset=dataset,
start=start,
end=end,
groupby=None,
conditions=conditions,
filter_keys=empty_filters,
aggregations=[],
selected_columns=selected_columns_empty,
referrer="tagstore._get_tag_keys_and_top_values_empty_counts",
tenant_ids=tenant_ids,
)
Comment on lines +802 to +813
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we going to end up making an extra query for every org to fetch these empty keys?

stats_map: dict[str, dict[str, Any]] = {}
for k in keys_to_check:
aliases = empty_alias_map[k]
stats_map[k] = {
"count": empty_results.get(aliases["count"], 0) or 0,
"first_seen": empty_results.get(aliases["first"]),
"last_seen": empty_results.get(aliases["last"]),
}
return stats_map

def get_release_tags(self, organization_id, project_ids, environment_id, versions):
filters = {"project_id": project_ids}
if environment_id:
Expand Down
74 changes: 74 additions & 0 deletions tests/sentry/issues/endpoints/test_group_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,80 @@ def test_device_class(self) -> None:
assert top_values[1]["value"] == "low"
assert top_values[2]["value"] == "medium"

def test_include_empty_values_for_specific_key_with_feature(self) -> None:
event = self.store_event(
data={
"fingerprint": ["group-empty-all"],
"tags": {},
"timestamp": before_now(minutes=1).isoformat(),
},
project_id=self.project.id,
assert_no_errors=False,
)
self.store_event(
data={
"fingerprint": ["group-empty-all"],
"tags": {"foo": "bar"},
"timestamp": before_now(minutes=1).isoformat(),
},
project_id=self.project.id,
)

self.login_as(user=self.user)

url = f"/api/0/issues/{event.group.id}/tags/?key=foo"

# Off by default: empty should be excluded from topValues
response = self.client.get(url, format="json")
assert response.status_code == 200
assert len(response.data) == 1
values = {v["value"] for v in response.data[0]["topValues"]}
assert values == {"bar"}

# With feature: empty should be included
with self.feature({"organizations:issue-tags-include-empty-values": True}):
response = self.client.get(url, format="json")
assert response.status_code == 200
values = {v["value"] for v in response.data[0]["topValues"]}
assert values == {"", "bar"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we also check that the empty tag count is correct (so response.data[0]["topValues"]["count"] is 1 or whatever)


def test_include_empty_values_in_all_tags_with_feature(self) -> None:
event = self.store_event(
data={
"fingerprint": ["group-empty-all-tags"],
"tags": {"foo": ""},
"timestamp": before_now(minutes=1).isoformat(),
},
project_id=self.project.id,
assert_no_errors=False,
)
self.store_event(
data={
"fingerprint": ["group-empty-all-tags"],
"tags": {"foo": "bar"},
"timestamp": before_now(minutes=1).isoformat(),
},
project_id=self.project.id,
)

self.login_as(user=self.user)
url = f"/api/0/issues/{event.group.id}/tags/?limit=10"

# Off by default: empty should be excluded from the All Tags list
response = self.client.get(url, format="json")
assert response.status_code == 200
foo = next((t for t in response.data if t["key"] == "foo"), None)
assert foo is not None
assert {v["value"] for v in foo["topValues"]} == {"bar"}

# With feature: empty should be included in the All Tags list
with self.feature({"organizations:issue-tags-include-empty-values": True}):
response = self.client.get(url, format="json")
assert response.status_code == 200
foo = next((t for t in response.data if t["key"] == "foo"), None)
assert foo is not None
assert {v["value"] for v in foo["topValues"]} == {"", "bar"}

def test_flags(self) -> None:
event1 = self.store_event(
data={
Expand Down
Loading