Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 94 additions & 30 deletions data-models/pkg/protos/health_event.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions data-models/protobufs/health_event.proto
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ message HealthEvents {
repeated HealthEvent events = 2;
}

// ProcessingStrategy defines how downstream modules should handle the event.
// EXECUTE_REMEDIATION: normal behavior; downstream modules may update cluster state.
// STORE_ONLY: observability-only behavior; event should be persisted/exported but should not modify cluster resources.
enum ProcessingStrategy {
EXECUTE_REMEDIATION = 0;
STORE_ONLY = 1;
}

enum RecommendedAction {
NONE = 0;
COMPONENT_RESET = 2;
Expand Down Expand Up @@ -66,6 +74,7 @@ message HealthEvent {
string nodeName = 13;
BehaviourOverrides quarantineOverrides = 14;
BehaviourOverrides drainOverrides = 15;
ProcessingStrategy processingStrategy = 16;
}

message BehaviourOverrides {
Expand Down
1 change: 1 addition & 0 deletions event-exporter/pkg/transformer/cloudevents.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ func ToCloudEvent(event *pb.HealthEvent, metadata map[string]string) (*CloudEven
"entitiesImpacted": entities,
"generatedTimestamp": timestamp,
"nodeName": event.NodeName,
"processingStrategy": event.ProcessingStrategy.String(),
}

if len(event.Metadata) > 0 {
Expand Down
4 changes: 4 additions & 0 deletions event-exporter/pkg/transformer/cloudevents_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func TestToCloudEvent(t *testing.T) {
Force: false,
Skip: true,
},
ProcessingStrategy: pb.ProcessingStrategy_STORE_ONLY,
},
metadata: map[string]string{
"cluster": "prod-cluster-1",
Expand Down Expand Up @@ -102,6 +103,9 @@ func TestToCloudEvent(t *testing.T) {
if healthEvent["recommendedAction"] != "RESTART_VM" {
t.Errorf("recommendedAction = %v, want %v", healthEvent["recommendedAction"], "RESTART_VM")
}
if healthEvent["processingStrategy"] != "STORE_ONLY" {
t.Errorf("processingStrategy = %v, want STORE_ONLY", healthEvent["processingStrategy"])
}

entities := healthEvent["entitiesImpacted"].([]map[string]any)
if len(entities) != 2 {
Expand Down
1 change: 1 addition & 0 deletions fault-quarantine/pkg/evaluator/rule_evaluator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ func TestRoundTrip(t *testing.T) {
"nanos": float64(eventTime.GetNanos()),
},
"nodeName": "test-node",
"processingStrategy": float64(0),
"quarantineOverrides": nil,
"drainOverrides": nil,
}
Expand Down
2 changes: 1 addition & 1 deletion fault-quarantine/pkg/initializer/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func InitializeAll(ctx context.Context, params InitializationParams) (*Component
}

builder := client.GetPipelineBuilder()
pipeline := builder.BuildAllHealthEventInsertsPipeline()
pipeline := builder.BuildProcessableHealthEventInsertsPipeline()

var tomlCfg config.TomlConfig
if err := configmanager.LoadTOMLConfig(params.TomlConfigPath, &tomlCfg); err != nil {
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union

DESCRIPTOR: _descriptor.FileDescriptor

class ProcessingStrategy(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
EXECUTE_REMEDIATION: _ClassVar[ProcessingStrategy]
STORE_ONLY: _ClassVar[ProcessingStrategy]

class RecommendedAction(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
NONE: _ClassVar[RecommendedAction]
Expand All @@ -23,6 +28,8 @@ class RecommendedAction(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
RUN_DCGMEUD: _ClassVar[RecommendedAction]
UNKNOWN: _ClassVar[RecommendedAction]

EXECUTE_REMEDIATION: ProcessingStrategy
STORE_ONLY: ProcessingStrategy
NONE: RecommendedAction
COMPONENT_RESET: RecommendedAction
CONTACT_SUPPORT: RecommendedAction
Expand Down Expand Up @@ -68,6 +75,7 @@ class HealthEvent(_message.Message):
"nodeName",
"quarantineOverrides",
"drainOverrides",
"processingStrategy",
)

class MetadataEntry(_message.Message):
Expand All @@ -93,6 +101,7 @@ class HealthEvent(_message.Message):
NODENAME_FIELD_NUMBER: _ClassVar[int]
QUARANTINEOVERRIDES_FIELD_NUMBER: _ClassVar[int]
DRAINOVERRIDES_FIELD_NUMBER: _ClassVar[int]
PROCESSINGSTRATEGY_FIELD_NUMBER: _ClassVar[int]
version: int
agent: str
componentClass: str
Expand All @@ -108,6 +117,7 @@ class HealthEvent(_message.Message):
nodeName: str
quarantineOverrides: BehaviourOverrides
drainOverrides: BehaviourOverrides
processingStrategy: ProcessingStrategy
def __init__(
self,
version: _Optional[int] = ...,
Expand All @@ -125,6 +135,7 @@ class HealthEvent(_message.Message):
nodeName: _Optional[str] = ...,
quarantineOverrides: _Optional[_Union[BehaviourOverrides, _Mapping]] = ...,
drainOverrides: _Optional[_Union[BehaviourOverrides, _Mapping]] = ...,
processingStrategy: _Optional[_Union[ProcessingStrategy, str]] = ...,
) -> None: ...

class BehaviourOverrides(_message.Message):
Expand Down
Loading
Loading