-
Notifications
You must be signed in to change notification settings - Fork 67
Expand file tree
/
Copy pathMakefile
More file actions
919 lines (815 loc) · 33.1 KB
/
Makefile
File metadata and controls
919 lines (815 loc) · 33.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
# Main Makefile for nvsentinel project
# Coordinates between multiple sub-Makefiles organized by functionality
# Go binary and tools
GO := go
GOLANGCI_LINT := golangci-lint
GOTESTSUM := gotestsum
GOCOVER_COBERTURA := gocover-cobertura
ENVTEST := setup-envtest
# Variables
GOPATH ?= $(shell go env GOPATH)
GO_CACHE_DIR ?= $(shell go env GOCACHE)
# Load tool versions from .versions.yaml (single source of truth)
# Requires yq to be installed: brew install yq (macOS) or see https://github.com/mikefarah/yq
YQ := $(shell command -v yq 2> /dev/null)
ifndef YQ
$(error yq is required but not found. Install it with: brew install yq (macOS) or see https://github.com/mikefarah/yq)
endif
# Load versions from .versions.yaml
ADDLICENSE_VERSION := $(shell $(YQ) '.linting.addlicense' .versions.yaml)
BLACK_VERSION := $(shell $(YQ) '.linting.black' .versions.yaml)
DOCKER_BUILDX_VERSION := $(shell $(YQ) '.container_tools.docker_buildx' .versions.yaml)
GO_VERSION := $(shell $(YQ) '.languages.go' .versions.yaml)
GOCOVER_COBERTURA_VERSION := $(shell $(YQ) '.go_tools.gocover_cobertura' .versions.yaml)
GOLANGCI_LINT_VERSION := $(shell $(YQ) '.go_tools.golangci_lint' .versions.yaml)
GOTESTSUM_VERSION := $(shell $(YQ) '.go_tools.gotestsum' .versions.yaml)
GRPCIO_TOOLS_VERSION := $(shell $(YQ) '.protobuf.grpcio_tools' .versions.yaml)
KO_VERSION := $(shell $(YQ) '.container_tools.ko' .versions.yaml)
POETRY_VERSION := $(shell $(YQ) '.build_tools.poetry' .versions.yaml)
PROTOBUF_VERSION := $(shell $(YQ) '.protobuf.protobuf' .versions.yaml)
PROTOC_GEN_GO_GRPC_VERSION := $(shell $(YQ) '.protobuf.protoc_gen_go_grpc' .versions.yaml)
PROTOC_GEN_GO_VERSION := $(shell $(YQ) '.protobuf.protoc_gen_go' .versions.yaml)
PYTHON_VERSION := $(shell $(YQ) '.languages.python' .versions.yaml)
SHELLCHECK_VERSION := $(shell $(YQ) '.linting.shellcheck' .versions.yaml)
# Go modules with specific patterns from CI
GO_MODULES := \
api \
health-monitors/syslog-health-monitor \
health-monitors/csp-health-monitor \
health-monitors/kubernetes-object-monitor \
health-monitors/nic-health-monitor \
platform-connectors \
health-events-analyzer \
fault-quarantine \
labeler \
node-drainer \
fault-remediation \
janitor \
metadata-collector \
event-exporter \
store-client \
commons
# Python modules
PYTHON_MODULES := \
health-monitors/gpu-health-monitor
# Container-only modules
CONTAINER_MODULES := \
log-collector \
gpu-reset
# Special modules requiring private repo access
PRIVATE_MODULES := \
health-monitors/csp-health-monitor \
health-events-analyzer \
fault-quarantine \
labeler \
node-drainer \
fault-remediation \
janitor
# Modules requiring kubebuilder for tests
KUBEBUILDER_MODULES := \
node-drainer \
fault-remediation
# Default target
.PHONY: all
all: lint-test-all ## Run lint-test-all (default target)
# Show loaded tool versions
.PHONY: show-versions
show-versions: ## Display all tool versions loaded from .versions.yaml
ifndef YQ
@echo "⚠️ ERROR: yq is required to display versions"
@echo " Install yq for version management:"
@echo " macOS: brew install yq"
@echo " Linux: See https://github.com/mikefarah/yq"
@exit 1
else
@echo "=== Tool Versions (from .versions.yaml) ==="
@echo ""
@$(YQ) eval '.languages | to_entries | .[] | " " + .key + ": " + .value' .versions.yaml | \
(echo "Languages:" && cat)
@echo ""
@$(YQ) eval '.build_tools | to_entries | .[] | " " + .key + ": " + .value' .versions.yaml | \
(echo "Build Tools:" && cat)
@echo ""
@$(YQ) eval '.go_tools | to_entries | .[] | " " + .key + ": " + .value' .versions.yaml | \
(echo "Go Tools:" && cat)
@echo ""
@$(YQ) eval '.protobuf | to_entries | .[] | " " + .key + ": " + .value' .versions.yaml | \
(echo "Protocol Buffers:" && cat)
@echo ""
@$(YQ) eval '.linting | to_entries | .[] | " " + .key + ": " + .value' .versions.yaml | \
(echo "Linting:" && cat)
@echo ""
@$(YQ) eval '.container_tools | to_entries | .[] | " " + .key + ": " + .value' .versions.yaml | \
(echo "Container Tools:" && cat)
@echo ""
@$(YQ) eval '.testing_tools | to_entries | .[] | " " + .key + ": " + .value' .versions.yaml | \
(echo "Testing & E2E Tools:" && cat)
@echo ""
@echo "==========================================="
endif
# Setup development environment
.PHONY: dev-env-setup
dev-env-setup: ## Setup complete development environment (installs all required tools). Use AUTO_MODE=true to skip prompts
@echo "Setting up NVSentinel development environment..."
@AUTO_MODE=$(AUTO_MODE) bash scripts/setup-dev-env.sh
# Install lint tools
.PHONY: install-lint-tools
install-lint-tools: install-golangci-lint install-gotestsum install-gocover-cobertura ## Install all lint tools (golangci-lint, gotestsum, gocover-cobertura)
@echo "All lint tools installed successfully"
@echo ""
@echo "=== Installed Tool Versions and Locations ==="
@echo "Go: $$(go version)"
@echo " Location: $$(which go)"
@if command -v golangci-lint >/dev/null 2>&1; then \
echo "golangci-lint: $$(golangci-lint version 2>/dev/null | head -1)"; \
echo " Location: $$(which golangci-lint)"; \
else \
echo "golangci-lint: not found"; \
fi
@if command -v gotestsum >/dev/null 2>&1; then \
echo "gotestsum: $$(gotestsum --version 2>/dev/null || echo 'version command not available')"; \
echo " Location: $$(which gotestsum)"; \
else \
echo "gotestsum: not found"; \
fi
@if command -v gocover-cobertura >/dev/null 2>&1; then \
echo "gocover-cobertura: installed (no version command available)"; \
echo " Location: $$(which gocover-cobertura)"; \
else \
echo "gocover-cobertura: not found"; \
fi
@echo "=============================================="
# Install golangci-lint
.PHONY: install-golangci-lint
install-golangci-lint:
@echo "Installing golangci-lint $(GOLANGCI_LINT_VERSION)..."
@if command -v golangci-lint >/dev/null 2>&1; then \
current_version=$$(golangci-lint version 2>/dev/null | grep -o 'v[0-9]\+\.[0-9]\+\.[0-9]\+' || echo "unknown"); \
if [ "$$current_version" = "$(GOLANGCI_LINT_VERSION)" ]; then \
echo "golangci-lint $(GOLANGCI_LINT_VERSION) is already installed at $$(which golangci-lint)"; \
else \
existing_path=$$(which golangci-lint); \
install_dir=$$(dirname "$$existing_path"); \
echo "Current version: $$current_version at $$existing_path, installing $(GOLANGCI_LINT_VERSION) to $$install_dir..."; \
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b "$$install_dir" $(GOLANGCI_LINT_VERSION); \
fi; \
else \
echo "golangci-lint not found, installing $(GOLANGCI_LINT_VERSION) to $(GOPATH)/bin..."; \
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(GOPATH)/bin $(GOLANGCI_LINT_VERSION); \
fi
@echo "golangci-lint installation complete"
# Install gotestsum
.PHONY: install-gotestsum
install-gotestsum:
@echo "Installing gotestsum..."
@if ! command -v gotestsum >/dev/null 2>&1; then \
echo "gotestsum not found, installing..."; \
$(GO) install gotest.tools/gotestsum@$(GOTESTSUM_VERSION); \
else \
echo "gotestsum is already installed"; \
fi
# Install gocover-cobertura
.PHONY: install-gocover-cobertura
install-gocover-cobertura:
@echo "Installing gocover-cobertura..."
@if ! command -v gocover-cobertura >/dev/null 2>&1; then \
echo "gocover-cobertura not found, installing..."; \
$(GO) install github.com/boumenot/gocover-cobertura@$(GOCOVER_COBERTURA_VERSION); \
else \
echo "gocover-cobertura is already installed"; \
fi
# Install Go $(GO_VERSION) for CI environments (Linux and macOS, amd64 and arm64)
.PHONY: install-go-ci
install-go-ci: ## Install Go $(GO_VERSION) for CI environments (Linux/macOS, amd64/arm64)
@echo "Installing Go $(GO_VERSION) for CI..."
@# Detect platform and architecture
@OS=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
ARCH=$$(uname -m); \
case "$$ARCH" in \
x86_64) ARCH=amd64 ;; \
aarch64|arm64) ARCH=arm64 ;; \
*) echo "Unsupported architecture: $$ARCH" && exit 1 ;; \
esac; \
echo "Detected platform: $$OS-$$ARCH"; \
\
if command -v go >/dev/null 2>&1; then \
current_version=$$(go version | grep -o 'go[0-9]\+\.[0-9]\+\.[0-9]\+' | sed 's/go//'); \
if [ "$$current_version" = "$(GO_VERSION)" ]; then \
echo "Go $(GO_VERSION) is already installed"; \
echo "Location: $$(which go)"; \
go version; \
exit 0; \
else \
echo "Current Go version: $$current_version, installing $(GO_VERSION)..."; \
fi; \
else \
echo "Go not found, installing $(GO_VERSION)..."; \
fi; \
\
GO_TARBALL="go$(GO_VERSION).$$OS-$$ARCH.tar.gz"; \
GO_URL="https://go.dev/dl/$$GO_TARBALL"; \
echo "Downloading $$GO_URL..."; \
\
if command -v curl >/dev/null 2>&1; then \
if ! curl -fsSL "$$GO_URL" -o "$$GO_TARBALL"; then \
echo "Failed to download Go tarball from $$GO_URL"; \
exit 1; \
fi; \
elif command -v wget >/dev/null 2>&1; then \
if ! wget --show-progress "$$GO_URL"; then \
echo "Failed to download Go tarball from $$GO_URL"; \
exit 1; \
fi; \
else \
echo "Neither curl nor wget found. Please install one of them."; \
exit 1; \
fi; \
\
echo "Extracting Go $(GO_VERSION)..."; \
if [ "$$OS" = "darwin" ]; then \
rm -rf /usr/local/go 2>/dev/null || true; \
tar -C /usr/local -xzf "$$GO_TARBALL"; \
echo "Go $(GO_VERSION) installed to /usr/local/go"; \
echo "Add /usr/local/go/bin to your PATH if not already present"; \
else \
rm -rf /usr/local/go 2>/dev/null || true; \
tar -C /usr/local -xzf "$$GO_TARBALL"; \
echo "Go $(GO_VERSION) installed to /usr/local/go"; \
echo "Add /usr/local/go/bin to your PATH if not already present"; \
fi; \
\
rm -f "$$GO_TARBALL"; \
echo "Installation complete"; \
\
if [ -x /usr/local/go/bin/go ]; then \
echo "Installed Go version: $$(/usr/local/go/bin/go version)"; \
echo "Location: /usr/local/go/bin/go"; \
else \
echo "Warning: Go binary not found at expected location /usr/local/go/bin/go"; \
fi
# Lint and test all modules (delegates to sub-Makefiles)
.PHONY: lint-test-all
lint-test-all: protos-lint license-headers-lint gomod-lint health-monitors-lint-test-all go-lint-test-all python-lint-test-all kubernetes-distro-lint log-collector-lint gpu-reset-lint ## Lint and test all modules
# Health monitors lint-test (delegate to health-monitors/Makefile)
.PHONY: health-monitors-lint-test-all
health-monitors-lint-test-all:
@echo "Running lint and tests for all health monitors..."
$(MAKE) -C health-monitors lint-test-all
# Generate protobuf files
.PHONY: protos-generate
protos-generate: vendor-proto protos-clean ## Ensure vendor dir, then generate protobuf files from .proto sources
@echo "Generating protobuf files in data-models (Go) and gpu-health-monitor (Python)..."
@echo "=== Tool Versions ==="
@echo "Go: $$(go version)"
@echo "protoc: $$(protoc --version)"
@echo "protoc-gen-go: $$(protoc-gen-go --version)"
@echo "protoc-gen-go-grpc: $$(protoc-gen-go-grpc --version)"
@if command -v python3 >/dev/null 2>&1; then \
grpcio_tools_version=$$(python3 -c "import importlib.metadata; print('grpcio-tools', importlib.metadata.version('grpcio-tools'))" 2>/dev/null || echo "grpcio-tools: not installed"); \
echo "$$grpcio_tools_version"; \
fi
@if command -v black >/dev/null 2>&1; then \
black_version=$$(black --version 2>/dev/null | head -1 || echo "black: not available"); \
echo "$$black_version"; \
else \
echo "black: not found"; \
fi
@echo "========================"
# Generate Go protobuf files in data-models (shared by all Go modules)
$(MAKE) -C data-models protos-generate
# Generate Go protobuf files in api
$(MAKE) -C api protos-generate
# Generate Python protobuf files for gpu-health-monitor
$(MAKE) -C health-monitors/gpu-health-monitor protos-generate
# Generate Python protobuf files for dcgm-diag preflight check
$(MAKE) -C preflight-checks/dcgm-diag protos-generate
# Generate Python protobuf files for nccl-allreduce preflight check
$(MAKE) -C preflight-checks/nccl-allreduce protos-generate
# Check protobuf files
.PHONY: protos-lint
protos-lint: protos-generate ## Generate and check protobuf files are up to date
@echo "Checking protobuf files..."
git status --porcelain --untracked-files=no
git --no-pager diff
@echo "Checking if protobuf files are up to date..."
test -z "$$(git status --porcelain --untracked-files=no)"
# Clean generated protobuf files
.PHONY: protos-clean
protos-clean: ## Remove all generated protobuf files
@echo "Cleaning generated protobuf files..."
@echo "Removing Go protobuf files (.pb.go)..."
find . -name "*.pb.go" -type f -delete
@echo "Removing Python protobuf files (*_pb2.py, *_pb2_grpc.py, *_pb2.pyi)..."
find . \( -name "*_pb2.py" -o -name "*_pb2_grpc.py" -o -name "*_pb2.pyi" \) -type f -delete
@echo "All generated protobuf files have been removed."
# Ensure vendor directory and proto dependencies are present before generating protobufs
.PHONY: vendor-proto
vendor-proto:
$(MAKE) -C data-models vendor-proto
# Check license headers
.PHONY: license-headers-lint
license-headers-lint: ## Check license headers in source files
@echo "Checking license headers..."
addlicense -f .github/headers/LICENSE -check \
-ignore '.venv/**' \
-ignore '**/__pycache__/**' \
-ignore '**/.venv/**' \
-ignore '**/site-packages/**' \
-ignore '*/.venv/**' \
-ignore '**/.idea/**' \
-ignore '**/*.csv' \
-ignore '**/*.pyc' \
-ignore '**/*.txt' \
-ignore '**/*.xml' \
-ignore '**/*.yaml' \
-ignore '**/*.toml' \
-ignore '**/*lock.hcl' \
-ignore '**/*pb2*' \
-ignore '**/vendor/**' \
.
# Check go.mod files for proper replace directives
.PHONY: gomod-lint
gomod-lint: ## Validate go.mod files for local module replace directives
@echo "Validating go.mod files for local module replace directives..."
./scripts/validate-gomod.sh
# Sync dependencies across all go modules using Go workspace
.PHONY: dependencies-sync
dependencies-sync: dependencies-update go-mod-tidy-all ## Sync dependencies across all Go modules using workspace
@echo "go.mod and go.sum updated and synced successfully across all go modules"
# Update dependencies across all go modules using Go workspace
.PHONY: dependencies-update
dependencies-update:
@echo "Updating dependencies across all Go modules..."
rm go.work >/dev/null 2>&1 || true
find . -name "go.mod" | awk -F/go.mod '{print $$1}' | xargs go work init
go work sync
rm go.work go.work.sum >/dev/null 2>&1 || true
@echo "Dependencies updated successfully"
# Sync dependencies and lint to ensure no files were modified
.PHONY: dependencies-sync-lint
dependencies-sync-lint: dependencies-sync ## Sync dependencies and verify no files were modified
@echo "Checking if dependency sync modified any files..."
git status --porcelain --untracked-files=no
git --no-pager diff
@echo "Verifying that dependency sync didn't modify any files..."
test -z "$$(git status --porcelain --untracked-files=no)"
# Run go mod tidy in all directories with go.mod files
.PHONY: go-mod-tidy-all
go-mod-tidy-all: ## Run go mod tidy in all directories with go.mod files
@echo "Running go mod tidy in all directories with go.mod files..."
@find . -name "go.mod" -type f | while read -r gomod_file; do \
dir=$$(dirname "$$gomod_file"); \
echo "Running go mod tidy in $$dir..."; \
(cd "$$dir" && go mod tidy) || exit 1; \
done
@echo "go mod tidy completed in all modules"
# Lint and test non-health-monitor Go modules
.PHONY: go-lint-test-all
go-lint-test-all:
@echo "Running lint and tests for non-health-monitor Go modules..."
@for module in $(shell echo "$(GO_MODULES)" | tr ' ' '\n' | grep -v health-monitors); do \
echo "Processing $$module..."; \
$(MAKE) lint-test-$$module || exit 1; \
done
# Lint and test non-health-monitor Python modules
.PHONY: python-lint-test-all
python-lint-test-all:
@echo "Running lint and tests for non-health-monitor Python modules..."
@for module in $(shell echo "$(PYTHON_MODULES)" | tr ' ' '\n' | grep -v health-monitors); do \
echo "Processing $$module..."; \
$(MAKE) lint-test-$$module || exit 1; \
done
# Individual non-health-monitor Go module lint-test targets
.PHONY: lint-test-api
lint-test-api:
@echo "Linting and testing api (using standardized Makefile)..."
$(MAKE) -C api lint-test
.PHONY: lint-test-platform-connectors
lint-test-platform-connectors:
@echo "Linting and testing platform-connectors (using standardized Makefile)..."
$(MAKE) -C platform-connectors lint-test
.PHONY: lint-test-health-events-analyzer
lint-test-health-events-analyzer:
@echo "Linting and testing health-events-analyzer (using standardized Makefile)..."
$(MAKE) -C health-events-analyzer lint-test
.PHONY: lint-test-fault-quarantine
lint-test-fault-quarantine:
@echo "Linting and testing fault-quarantine (using standardized Makefile)..."
$(MAKE) -C fault-quarantine lint-test
.PHONY: lint-test-labeler
lint-test-labeler:
@echo "Linting and testing labeler (using standardized Makefile)..."
$(MAKE) -C labeler lint-test
.PHONY: lint-test-node-drainer
lint-test-node-drainer:
@echo "Linting and testing node-drainer (using standardized Makefile)..."
$(MAKE) -C node-drainer lint-test
.PHONY: lint-test-fault-remediation
lint-test-fault-remediation:
@echo "Linting and testing fault-remediation (using standardized Makefile)..."
$(MAKE) -C fault-remediation lint-test
.PHONY: lint-test-janitor
lint-test-janitor:
@echo "Linting and testing janitor (using standardized Makefile)..."
$(MAKE) -C janitor lint-test
.PHONY: lint-test-store-client
lint-test-store-client:
@echo "Linting and testing store-client..."
$(MAKE) -C store-client lint-test
.PHONY: lint-test-commons
lint-test-commons:
@echo "Linting and testing commons..."
$(MAKE) -C commons lint-test
.PHONY: lint-test-metadata-collector
lint-test-metadata-collector:
@echo "Linting and testing metadata-collector..."
$(MAKE) -C metadata-collector lint-test
.PHONY: lint-test-event-exporter
lint-test-event-exporter:
@echo "Linting and testing event-exporter..."
$(MAKE) -C event-exporter lint-test
# Python module lint-test targets (non-health-monitors)
# Currently no non-health-monitor Python modules
# Kubernetes distro lint (delegate to distros/kubernetes/Makefile)
.PHONY: kubernetes-distro-lint
kubernetes-distro-lint:
@echo "Linting Kubernetes distribution..."
$(MAKE) -C distros/kubernetes lint
# Helm chart validation
.PHONY: helm-lint
helm-lint:
@echo "🎯 Validating Helm charts..."
@# Ensure helm is available
@if ! command -v helm >/dev/null 2>&1; then \
echo "❌ Error: helm command not found. Please install Helm first."; \
exit 1; \
fi
@echo "Using Helm version: $$(helm version --short)"
@echo ""
@# Main nvsentinel chart
@echo "Validating main nvsentinel chart..."
helm lint distros/kubernetes/nvsentinel/
@echo ""
@# Individual component charts
@echo "Validating component charts..."
@for chart_dir in distros/kubernetes/nvsentinel/charts/*/; do \
if [[ -f "$$chart_dir/Chart.yaml" ]]; then \
chart_name=$$(basename "$$chart_dir"); \
echo "Validating chart: $$chart_name"; \
helm lint "$$chart_dir" -f distros/kubernetes/nvsentinel/values.yaml || exit 1; \
echo "Testing template rendering for: $$chart_name"; \
helm template "$$chart_name" "$$chart_dir" -f distros/kubernetes/nvsentinel/values.yaml >/dev/null || exit 1; \
echo ""; \
fi; \
done
@echo "✅ All Helm charts validated successfully"
# Helm chart unit tests
.PHONY: helm-test
helm-test:
@echo "🧪 Running Helm chart unit tests..."
@if ! helm plugin list | grep -q unittest; then \
echo "❌ Error: helm-unittest plugin not found. Install with: helm plugin install https://github.com/helm-unittest/helm-unittest"; \
exit 1; \
fi
@for chart_dir in distros/kubernetes/nvsentinel/charts/*/; do \
if [[ -d "$$chart_dir/tests" ]]; then \
chart_name=$$(basename "$$chart_dir"); \
echo "Running unit tests for chart: $$chart_name"; \
helm unittest "$$chart_dir" || exit 1; \
echo ""; \
fi; \
done
@echo "✅ All Helm chart unit tests passed"
# Log collector lint (shell script)
.PHONY: log-collector-lint
log-collector-lint: ## Lint shell scripts in log collector
@echo "Linting log collector shell scripts..."
$(MAKE) -C log-collector lint
# GPU reset lint (shell script)
.PHONY: gpu-reset-lint
gpu-reset-lint: ## Lint shell scripts in GPU reset
@echo "Linting GPU reset shell scripts..."
$(MAKE) -C gpu-reset lint
# Build targets (delegate to sub-Makefiles for better organization)
.PHONY: build-all
build-all: build-health-monitors build-main-modules ## Build all modules
# Build health monitors (delegate to health-monitors/Makefile)
.PHONY: build-health-monitors
build-health-monitors:
@echo "Building all health monitors..."
$(MAKE) -C health-monitors build-all
# Build non-health-monitor Go modules
.PHONY: build-main-modules
build-main-modules:
@echo "Building non-health-monitor Go modules..."
@for module in $(shell echo "$(GO_MODULES)" | tr ' ' '\n' | grep -v health-monitors); do \
echo "Building $$module..."; \
cd $$module && $(GO) build ./... && cd ..; \
done
# Individual build targets for non-health-monitor modules
define make-build-target
.PHONY: build-$(1)
build-$(1):
@echo "Building $(1)..."
cd $(1) && $(GO) build ./...
endef
$(foreach module,$(shell echo "$(GO_MODULES)" | tr ' ' '\n' | grep -v health-monitors),$(eval $(call make-build-target,$(module))))
# Health monitor build targets (delegate to health-monitors/Makefile)
.PHONY: build-syslog-health-monitor
build-syslog-health-monitor:
$(MAKE) -C health-monitors build-syslog-health-monitor
.PHONY: build-csp-health-monitor
build-csp-health-monitor:
$(MAKE) -C health-monitors build-csp-health-monitor
.PHONY: build-gpu-health-monitor
build-gpu-health-monitor:
$(MAKE) -C health-monitors build-gpu-health-monitor
# Clean targets (delegate to sub-Makefiles for better organization)
.PHONY: clean-all
clean-all: clean-health-monitors clean-main-modules ## Clean all modules
# Clean health monitors (delegate to health-monitors/Makefile)
.PHONY: clean-health-monitors
clean-health-monitors:
@echo "Cleaning all health monitors..."
$(MAKE) -C health-monitors clean-all
# Clean non-health-monitor Go modules
.PHONY: clean-main-modules
clean-main-modules:
@echo "Cleaning non-health-monitor Go modules..."
@for module in $(shell echo "$(GO_MODULES)" | tr ' ' '\n' | grep -v health-monitors); do \
echo "Cleaning $$module..."; \
$(MAKE) -C $$module clean || exit 1; \
done
# Docker targets (delegate to docker/Makefile) - standardized build system
.PHONY: docker-all
docker-all: ## Build all Docker images
@echo "Building all Docker images..."
$(MAKE) -C docker build-all
.PHONY: docker-publish-all
docker-publish-all: ## Build and publish all Docker images
@echo "Building and publishing all Docker images..."
$(MAKE) -C docker publish-all
.PHONY: docker-setup-buildx
docker-setup-buildx: ## Setup Docker buildx builder
$(MAKE) -C docker setup-buildx
# Ko targets - build Go container images without Docker
.PHONY: ko-build
ko-build: ## Build all ko-based container images locally
@echo "Building all ko-based container images..."
@./scripts/buildko.sh
.PHONY: ko-publish
ko-publish: ## Build and publish all ko-based container images
@echo "Building and publishing all ko-based container images..."
@./scripts/buildko.sh
# GPU health monitor Docker targets (special cases with DCGM versions)
.PHONY: docker-gpu-health-monitor-dcgm3
docker-gpu-health-monitor-dcgm3:
$(MAKE) -C docker build-gpu-health-monitor-dcgm3
.PHONY: docker-gpu-health-monitor-dcgm4
docker-gpu-health-monitor-dcgm4:
$(MAKE) -C docker build-gpu-health-monitor-dcgm4
.PHONY: docker-gpu-health-monitor
docker-gpu-health-monitor:
$(MAKE) -C docker build-gpu-health-monitor
# Individual module Docker targets
.PHONY: docker-syslog-health-monitor
docker-syslog-health-monitor:
$(MAKE) -C docker build-syslog-health-monitor
.PHONY: docker-csp-health-monitor
docker-csp-health-monitor:
$(MAKE) -C docker build-csp-health-monitor
.PHONY: docker-platform-connectors
docker-platform-connectors:
$(MAKE) -C docker build-platform-connectors
.PHONY: docker-health-events-analyzer
docker-health-events-analyzer:
$(MAKE) -C docker build-health-events-analyzer
.PHONY: docker-fault-quarantine
docker-fault-quarantine:
$(MAKE) -C docker build-fault-quarantine
.PHONY: docker-labeler
docker-labeler:
$(MAKE) -C docker build-labeler
.PHONY: docker-node-drainer
docker-node-drainer:
$(MAKE) -C docker build-node-drainer
.PHONY: docker-fault-remediation
docker-fault-remediation:
$(MAKE) -C docker build-fault-remediation
.PHONY: docker-janitor
docker-janitor:
$(MAKE) -C docker build-janitor
.PHONY: docker-log-collector
docker-log-collector:
$(MAKE) -C docker build-log-collector
# Health monitors group
.PHONY: docker-health-monitors
docker-health-monitors:
$(MAKE) -C docker build-health-monitors
# Main modules group (non-health-monitors)
.PHONY: docker-main-modules
docker-main-modules:
$(MAKE) -C docker build-main-modules
# ============================================================================
# Development Environment Targets
# ============================================================================
# Configuration variables
CTLPTL_CONFIG_FILE ?= .ctlptl.yaml
CLUSTER_NAME ?= kind-nvsentinel
REGISTRY_NAME ?= ctlptl-registry
REGISTRY_PORT ?= 5001
# Tilt development targets
.PHONY: tilt-up
tilt-up: ## Start Tilt development environment
@echo "Starting Tilt development environment..."
@if ! command -v tilt >/dev/null 2>&1; then \
echo "Error: tilt is not installed. Please install from https://tilt.dev/"; \
exit 1; \
fi
tilt up -f tilt/Tiltfile
.PHONY: tilt-down
tilt-down: ## Stop Tilt development environment
@echo "Stopping Tilt development environment..."
@if command -v tilt >/dev/null 2>&1; then \
tilt down -f tilt/Tiltfile; \
else \
echo "Warning: tilt is not installed"; \
fi
.PHONY: tilt-ci
tilt-ci: ## Run Tilt in CI mode (no UI, waits for all resources)
@echo "Running Tilt in CI mode (no UI)..."
@if ! command -v tilt >/dev/null 2>&1; then \
echo "Error: tilt is not installed. Please install from https://tilt.dev/"; \
exit 1; \
fi
@echo "Starting Tilt with SKIP_KWOK_NODES_IN_TILT=1 (with retry logic)..."
@for i in 1 2 3; do \
echo "Attempt $$i of 3..."; \
if SKIP_KWOK_NODES_IN_TILT=1 tilt ci -f tilt/Tiltfile --timeout=10m; then \
echo "Tilt CI succeeded on attempt $$i"; \
break; \
else \
if [ $$i -lt 3 ]; then \
echo "Tilt CI failed on attempt $$i, retrying in 10 seconds..."; \
sleep 10; \
else \
echo "Tilt CI failed after 3 attempts"; \
exit 1; \
fi; \
fi; \
done
@echo "Creating KWOK nodes"
@bash tilt/create-kwok-nodes.sh
@echo "Waiting for all deployments to be ready..."
@kubectl get deployments --all-namespaces --no-headers -o custom-columns=":metadata.namespace,:metadata.name" | while read ns name; do \
echo "Waiting for deployment $$name in namespace $$ns..."; \
kubectl rollout status deployment/$$name -n $$ns --timeout=300s || exit 1; \
done
@echo "Waiting for all daemonsets to be ready..."
@kubectl get daemonsets --all-namespaces --no-headers -o custom-columns=":metadata.namespace,:metadata.name" | while read ns name; do \
echo "Waiting for daemonset $$name in namespace $$ns..."; \
kubectl rollout status daemonset/$$name -n $$ns --timeout=300s || exit 1; \
done
@echo "Waiting for all statefulsets to be ready..."
@kubectl get statefulsets --all-namespaces --no-headers -o custom-columns=":metadata.namespace,:metadata.name" | while read ns name; do \
echo "Waiting for statefulset $$name in namespace $$ns..."; \
if kubectl rollout status statefulset/$$name -n $$ns --timeout=300s 2>/dev/null; then \
echo "StatefulSet $$name rolled out successfully"; \
else \
echo "Rollout status not available, checking ready replicas..."; \
desired=$$(kubectl get statefulset $$name -n $$ns -o jsonpath='{.spec.replicas}' 2>/dev/null); \
if [ -z "$$desired" ]; then \
echo "Failed to get replica count for statefulset $$name"; \
exit 1; \
fi; \
timeout=300; elapsed=0; \
while [ $$elapsed -lt $$timeout ]; do \
ready=$$(kubectl get statefulset $$name -n $$ns -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0"); \
if [ "$$ready" = "$$desired" ]; then \
echo "StatefulSet $$name is ready ($$ready/$$desired replicas)"; \
break; \
fi; \
if [ $$elapsed -eq 0 ] || [ $$((elapsed % 30)) -eq 0 ]; then \
echo "Waiting for $$name: $$ready/$$desired replicas ready..."; \
fi; \
sleep 5; elapsed=$$((elapsed + 5)); \
done; \
if [ $$elapsed -ge $$timeout ]; then \
echo "Timeout waiting for statefulset $$name"; \
exit 1; \
fi; \
fi; \
done
@echo "All workloads are ready!"
# ctlptl cluster configuration
.PHONY: cluster-create
cluster-create: ## Create local ctlptl-managed Kind cluster with registry
@echo "Creating local development cluster with ctlptl..."
@if ! command -v ctlptl >/dev/null 2>&1; then \
echo "Error: ctlptl is not installed. Please install from https://github.com/tilt-dev/ctlptl"; \
echo "Installation options:"; \
echo " - Homebrew: brew install tilt-dev/tap/ctlptl"; \
echo " - Go: go install github.com/tilt-dev/ctlptl/cmd/ctlptl@latest"; \
exit 1; \
fi
@if ! command -v docker >/dev/null 2>&1; then \
echo "Error: docker is not installed. Please install Docker"; \
exit 1; \
fi
@if ! command -v kind >/dev/null 2>&1; then \
echo "Error: kind is not installed. Please install from https://kind.sigs.k8s.io/"; \
exit 1; \
fi
@echo "Creating cluster and registry with ctlptl..."
@if [ ! -f "$(CTLPTL_CONFIG_FILE)" ]; then \
echo "Error: ctlptl config file $(CTLPTL_CONFIG_FILE) not found"; \
exit 1; \
fi
ctlptl apply -f $(CTLPTL_CONFIG_FILE)
@echo "Waiting for all nodes to be ready..."
@kubectl wait --for=condition=ready nodes --all --timeout=300s
@echo "Cluster created successfully!"
@echo "Registry available at localhost:$(REGISTRY_PORT)"
.PHONY: cluster-delete
cluster-delete: ## Delete local ctlptl-managed cluster and registry
@echo "Deleting local development cluster..."
@if [ ! -f "$(CTLPTL_CONFIG_FILE)" ]; then \
echo "Error: ctlptl config file $(CTLPTL_CONFIG_FILE) not found"; \
exit 1; \
fi
ctlptl delete -f $(CTLPTL_CONFIG_FILE) || echo "Resources from $(CTLPTL_CONFIG_FILE) not found"
.PHONY: cluster-status
cluster-status: ## Show cluster and registry status
@echo "=== Cluster Status ==="
@if command -v ctlptl >/dev/null 2>&1; then \
echo "ctlptl clusters:"; \
ctlptl get clusters 2>/dev/null || echo "No ctlptl clusters found"; \
echo ""; \
fi
@if command -v kubectl >/dev/null 2>&1 && kubectl cluster-info >/dev/null 2>&1; then \
echo "Current kubectl context: $$(kubectl config current-context)"; \
echo ""; \
echo "Cluster nodes:"; \
kubectl get nodes -o wide 2>/dev/null || echo "Unable to get nodes"; \
echo ""; \
echo "Registry status:"; \
docker ps --filter "name=$(REGISTRY_NAME)" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" 2>/dev/null || echo "Registry container not found"; \
else \
echo "No active cluster (kubectl not configured or cluster not accessible)"; \
fi
@echo "======================"
# Combined development environment targets
.PHONY: dev-env
dev-env: cluster-create tilt-up ## Create cluster and start Tilt (full development setup)
.PHONY: dev-env-clean
dev-env-clean: tilt-down cluster-delete ## Stop Tilt and delete cluster (full cleanup)
# Quick development workflow targets
.PHONY: dev-restart
dev-restart: tilt-down tilt-up ## Restart Tilt without recreating cluster
.PHONY: dev-reset
dev-reset: dev-env-clean dev-env ## Full reset (tear down and recreate everything)
# Tilt end-to-end test target for CI
.PHONY: e2e-test-ci
e2e-test-ci: tilt-ci ## Run end-to-end test suite in CI mode
$(MAKE) -C tests test-ci
# Tilt end-to-end test target
.PHONY: e2e-test
e2e-test:
$(MAKE) -C dev tilt-up
$(MAKE) -C tests test
# Kubernetes Helm targets (delegate to distros/kubernetes/Makefile)
.PHONY: kubernetes-distro-helm-publish
kubernetes-distro-helm-publish:
$(MAKE) -C distros/kubernetes helm-publish
# Individual Docker build targets (delegate to docker/Makefile)
# Use: make -C docker build-<module-name>
# Utility targets
.PHONY: list-modules
list-modules: ## List all modules (Go, Python, container-only)
@echo "Go modules:"
@for module in $(GO_MODULES); do echo " $$module"; done
@echo "Python modules:"
@for module in $(PYTHON_MODULES); do echo " $$module"; done
@echo "Container-only modules:"
@for module in $(CONTAINER_MODULES); do echo " $$module"; done
.PHONY: help
help: ## Display available make targets
@echo "nvsentinel Main Makefile"
@echo ""
@echo "Available targets:"
@grep -hE '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort -u | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-30s\033[0m %s\n", $$1, $$2}'
@echo ""
@echo "Configuration variables:"
@echo " GO_VERSION=$(GO_VERSION), GOLANGCI_LINT_VERSION=$(GOLANGCI_LINT_VERSION)"
@echo " CTLPTL_CONFIG_FILE=$(CTLPTL_CONFIG_FILE), REGISTRY_PORT=$(REGISTRY_PORT)"
@echo ""
@echo "Sub-Makefiles: health-monitors/, docker/, distros/kubernetes/, tests/"
#==============================================================================
# PostgreSQL Schema Management
#==============================================================================
.PHONY: validate-postgres-schema
validate-postgres-schema: ## Validate PostgreSQL schema consistency between docs and Helm values
@./scripts/validate-postgres-schema.sh
.PHONY: update-helm-postgres-schema
update-helm-postgres-schema: ## Update Helm values file with schema from docs/postgresql-schema.sql
@./scripts/update-helm-postgres-schema.sh