Skip to content

Commit 7c1ae5f

Browse files
authored
feat: FIT-710: Enable FSM state fields in serializers and models (#8775)
Co-authored-by: bmartel <[email protected]>
1 parent c1b6cb6 commit 7c1ae5f

File tree

7 files changed

+484
-13
lines changed

7 files changed

+484
-13
lines changed

label_studio/data_manager/managers.py

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
)
3131
from django.db.models.fields.json import KeyTextTransform
3232
from django.db.models.functions import Cast, Coalesce, Concat
33+
from fsm.queryset_mixins import FSMStateQuerySetMixin
3334
from pydantic import BaseModel
3435

3536
from label_studio.core.utils.common import load_func
@@ -488,7 +489,16 @@ def apply_filters(queryset, filters, project, request):
488489
return queryset
489490

490491

491-
class TaskQuerySet(models.QuerySet):
492+
class TaskQuerySet(FSMStateQuerySetMixin, models.QuerySet):
493+
"""
494+
QuerySet for Task model with FSM state annotation support.
495+
496+
Extends Django's QuerySet with:
497+
- FSM state annotation (via FSMStateQuerySetMixin)
498+
- Data Manager filters and ordering
499+
- Selected items handling
500+
"""
501+
492502
def prepared(self, prepare_params=None):
493503
"""Apply filters, ordering and selected items to queryset
494504
@@ -700,6 +710,29 @@ def dummy(queryset):
700710
return queryset
701711

702712

713+
def annotate_state(queryset):
714+
"""
715+
Annotate queryset with FSM state as 'state' field.
716+
717+
Uses FSMStateQuerySetMixin.annotate_fsm_state() to efficiently annotate
718+
the current state without causing N+1 queries. Aliases 'current_state' to
719+
'state' to match the Data Manager column name.
720+
721+
Note: Feature flag checks and user context validation are handled by
722+
annotate_fsm_state() itself, so no additional checks are needed here.
723+
"""
724+
# Use the mixin's annotate_fsm_state() method which creates 'current_state' annotation
725+
# (includes feature flag and user context checks)
726+
queryset = queryset.annotate_fsm_state()
727+
728+
# Alias 'current_state' to 'state' for Data Manager column compatibility
729+
# Only add the alias if current_state was actually added (feature flags enabled)
730+
if 'current_state' in queryset.query.annotations:
731+
return queryset.annotate(state=F('current_state'))
732+
733+
return queryset
734+
735+
703736
settings.DATA_MANAGER_ANNOTATIONS_MAP = {
704737
'avg_lead_time': annotate_avg_lead_time,
705738
'completed_at': annotate_completed_at,
@@ -712,6 +745,7 @@ def dummy(queryset):
712745
'file_upload': file_upload,
713746
'draft_exists': annotate_draft_exists,
714747
'storage_filename': annotate_storage_filename,
748+
'state': annotate_state,
715749
}
716750

717751

@@ -724,6 +758,19 @@ def update_annotation_map(obj):
724758

725759

726760
class PreparedTaskManager(models.Manager):
761+
"""
762+
Manager for Task model with Data Manager annotations.
763+
764+
Provides:
765+
- Advanced query annotations for Data Manager
766+
- Filter and ordering support
767+
- FSM state annotation support (via TaskQuerySet)
768+
769+
Note: Overrides the base get_queryset() to return TaskQuerySet. Also has
770+
a custom get_queryset(fields_for_evaluation, prepare_params, ...) method
771+
for Data Manager-specific functionality.
772+
"""
773+
727774
@staticmethod
728775
def annotate_queryset(
729776
queryset, fields_for_evaluation=None, all_fields=False, excluded_fields_for_evaluation=None, request=None
@@ -754,13 +801,23 @@ def get_queryset(
754801
self, fields_for_evaluation=None, prepare_params=None, all_fields=False, excluded_fields_for_evaluation=None
755802
):
756803
"""
804+
Get queryset with optional Data Manager annotations and filters.
805+
806+
When called without parameters (Django internal use), returns TaskQuerySet.
807+
When called with parameters (Data Manager use), returns annotated and filtered queryset.
808+
757809
:param fields_for_evaluation: list of annotated fields in task
758810
:param prepare_params: filters, ordering, selected items
759811
:param all_fields: evaluate all fields for task
760812
:param excluded_fields_for_evaluation: list of fields to exclude even when all_fields=True
761813
:param request: request for user extraction
762814
:return: task queryset with annotated fields
763815
"""
816+
# If called without parameters, return base TaskQuerySet (for Django internal use)
817+
if prepare_params is None:
818+
return TaskQuerySet(self.model, using=self._db)
819+
820+
# Otherwise, use Data Manager filtering and annotation
764821
queryset = self.only_filtered(prepare_params=prepare_params)
765822
# Expose view data to annotation functions for column-specific configuration
766823
queryset.view_data = getattr(prepare_params, 'data', None)
@@ -781,5 +838,24 @@ def only_filtered(self, prepare_params=None):
781838

782839

783840
class TaskManager(models.Manager):
841+
"""
842+
Default manager for Task model.
843+
844+
Provides:
845+
- User-scoped filtering
846+
- Custom QuerySet with FSM state support
847+
848+
Note: Overrides get_queryset() to return TaskQuerySet, which includes
849+
FSMStateQuerySetMixin for state annotation support.
850+
"""
851+
852+
def get_queryset(self):
853+
"""Return TaskQuerySet which includes FSM state annotation support"""
854+
return TaskQuerySet(self.model, using=self._db)
855+
784856
def for_user(self, user):
785-
return self.filter(project__organization=user.active_organization)
857+
return self.get_queryset().filter(project__organization=user.active_organization)
858+
859+
def with_state(self):
860+
"""Return queryset with FSM state annotated."""
861+
return self.get_queryset().annotate_fsm_state()

label_studio/data_manager/serializers.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33
import os
44

55
import ujson as json
6+
from core.current_request import CurrentContext
7+
from core.feature_flags import flag_set
68
from data_manager.models import Filter, FilterGroup, View
79
from django.conf import settings
810
from django.db import transaction
911
from drf_spectacular.utils import extend_schema_field
12+
from fsm.serializer_fields import FSMStateField
1013
from projects.models import Project
1114
from rest_framework import serializers
1215
from tasks.models import Task
@@ -434,6 +437,8 @@ class PredictionsDMFieldSerializer(serializers.SerializerMethodField):
434437

435438

436439
class DataManagerTaskSerializer(TaskSerializer):
440+
"""Data Manager Task Serializer with FSM state support."""
441+
437442
predictions = PredictionsDMFieldSerializer(required=False, read_only=True)
438443
annotations = AnnotationsDMFieldSerializer(required=False, many=True, default=[], read_only=True)
439444
drafts = AnnotationDraftDMFieldSerializer(required=False, read_only=True)
@@ -454,6 +459,7 @@ class DataManagerTaskSerializer(TaskSerializer):
454459
avg_lead_time = serializers.FloatField(required=False)
455460
draft_exists = serializers.BooleanField(required=False)
456461
updated_by = UpdatedByDMFieldSerializer(required=False, read_only=True)
462+
state = FSMStateField(read_only=True) # FSM state - automatically uses annotation if present
457463

458464
CHAR_LIMITS = 500
459465

@@ -470,6 +476,13 @@ def to_representation(self, obj):
470476
ret.pop('annotations', None)
471477
if not self.context.get('predictions'):
472478
ret.pop('predictions', None)
479+
# Remove state field if feature flags are disabled
480+
user = CurrentContext.get_user()
481+
if not (
482+
flag_set('fflag_feat_fit_568_finite_state_management', user=user)
483+
and flag_set('fflag_feat_fit_710_fsm_state_fields', user=user)
484+
):
485+
ret.pop('state', None)
473486
return ret
474487

475488
def _pretty_results(self, task, field, unique=False):
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""
2+
FSM QuerySet Mixins for annotating entities with their current state.
3+
4+
Provides reusable Django QuerySet mixins that efficiently annotate entities
5+
with their current FSM state using optimized subqueries to prevent N+1 queries.
6+
7+
Usage:
8+
class TaskQuerySet(FSMStateQuerySetMixin, models.QuerySet):
9+
pass
10+
11+
class TaskManager(models.Manager):
12+
def get_queryset(self):
13+
return TaskQuerySet(self.model, using=self._db).annotate_fsm_state()
14+
15+
Note:
16+
All state annotation functionality is guarded by TWO feature flags:
17+
1. 'fflag_feat_fit_568_finite_state_management' - Controls FSM background calculations
18+
2. 'fflag_feat_fit_710_fsm_state_fields' - Controls state field display in APIs
19+
20+
When disabled, no annotation is performed and there is zero performance impact.
21+
"""
22+
23+
import logging
24+
25+
from core.current_request import CurrentContext
26+
from core.feature_flags import flag_set
27+
from django.db.models import OuterRef, Subquery
28+
from fsm.registry import get_state_model
29+
30+
logger = logging.getLogger(__name__)
31+
32+
33+
class FSMStateQuerySetMixin:
34+
"""
35+
Mixin for Django QuerySets to efficiently annotate FSM state.
36+
37+
Provides the `annotate_fsm_state()` method that adds a `current_state`
38+
annotation to the queryset using an optimized subquery.
39+
40+
This approach:
41+
- Prevents N+1 queries by using a single JOIN/subquery
42+
- Handles missing states gracefully (returns None)
43+
- Uses UUID7 natural ordering for optimal performance
44+
- Works with any FSM entity that has a registered state model
45+
46+
Example:
47+
# In your model manager
48+
class TaskManager(models.Manager):
49+
def get_queryset(self):
50+
return TaskQuerySet(self.model, using=self._db)
51+
52+
def with_state(self):
53+
return self.get_queryset().annotate_fsm_state()
54+
55+
# Usage
56+
tasks = Task.objects.with_state().filter(project=project)
57+
for task in tasks:
58+
print(f"Task {task.id}: {task.current_state}") # No additional queries!
59+
"""
60+
61+
def annotate_fsm_state(self):
62+
"""
63+
Annotate the queryset with the current FSM state.
64+
65+
Adds a `current_state` field to each object containing the current
66+
state string value. This is done using an efficient subquery that
67+
leverages UUID7 natural ordering.
68+
69+
Returns:
70+
QuerySet: The annotated queryset with `current_state` field
71+
72+
Note:
73+
- If FSM feature flag is disabled, returns queryset unchanged (zero impact)
74+
- If no state exists for an entity, `current_state` will be None
75+
- The state is read-only and should not be modified directly
76+
"""
77+
# Check feature flag directly (works for both core and enterprise)
78+
# Using flag_set directly instead of is_fsm_enabled to work in enterprise context
79+
user = CurrentContext.get_user()
80+
if not (
81+
flag_set('fflag_feat_fit_568_finite_state_management', user=user)
82+
and flag_set('fflag_feat_fit_710_fsm_state_fields', user=user)
83+
):
84+
logger.debug('FSM feature flag disabled, skipping state annotation')
85+
return self
86+
87+
# Get the entity name from the model
88+
entity_name = self.model._meta.model_name
89+
90+
# Get the state model for this entity
91+
state_model = get_state_model(entity_name)
92+
93+
if not state_model:
94+
# No state model registered, return queryset as-is
95+
logger.debug(f'No state model registered for {entity_name}, skipping annotation')
96+
return self
97+
98+
# Get the foreign key field name on the state model
99+
# e.g., 'task_id' for TaskState
100+
entity_field_name = state_model._get_entity_field_name()
101+
fk_field = f'{entity_field_name}_id'
102+
103+
# Create subquery to get current state using UUID7 natural ordering
104+
# This is extremely efficient because:
105+
# 1. UUID7 provides natural time ordering (latest = highest ID)
106+
# 2. We only fetch the state column, not the entire record
107+
# 3. Django optimizes this into a single JOIN or lateral subquery
108+
current_state_subquery = Subquery(
109+
state_model.objects.filter(**{fk_field: OuterRef('pk')}).order_by('-id').values('state')[:1]
110+
)
111+
112+
# Annotate the queryset with the current state
113+
return self.annotate(current_state=current_state_subquery)

0 commit comments

Comments
 (0)