easyai/scripts/install_easyai_server.sh at develop · solariun/easyai · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env bash
# ============================================================================
# easyai installer — replaces install_llama_server.sh
#
# What this script does:
#   1. Installs build deps (CMake/Ninja/git/pkg-config + libcurl) and the
#      backend SDK that matches your hardware (Vulkan / CUDA / ROCm-HIP).
#   2. Clones llama.cpp + easyai (or uses existing sibling dirs).
#   3. Builds easyai (libeasyai + easyai-local + easyai-cli + easyai-server
#      + easyai-agent + easyai-recipes + easyai-chat) with the selected
#      GPU backend.
#   4. Installs the binaries to $prefix/bin.
#   5. Creates a system user, /var/lib/easyai/{models,workspace}, and
#      /etc/easyai/{easyai.ini, system.txt_template, api_key} —
#      out-of-the-box uses the binary's built-in "Deep" prompt; copy
#      system.txt_template to system.txt to activate a custom persona.
#   6. Drops a hardened systemd unit that runs easyai-server with mlock,
#      flash-attn, q8_0 KV cache, Bearer auth, Prometheus /metrics, and
#      coredump capture (LimitCORE=infinity + systemd-coredump package).
#   7. (Linux only, optional) AMD-iGPU GTT kernel cmdline tweak; mDNS via
#      avahi; memlock+nofile limits; system swap off.
#
# What this script does NOT need to do (vs the old llama-server installer):
#   - No transparent proxy: easyai's HTTP layer already does the OpenAI-
#     compatible /v1/chat/completions itself.
#   - No SearXNG: web_search is a built-in tool that scrapes DuckDuckGo
#     directly via libcurl.
#   - No MCP bridge: tools live inside easyai-server and are auto-registered.
#   - No webui rebrand: the webui is a self-contained file embedded in the
#     binary (--webui-title would have nothing to patch).
#
# Linux / Debian target. The build itself works anywhere; this *installer*
# uses apt-get + systemd. macOS users: see the project README for the manual
# build matrix.
#
# Usage:
#   ./install_easyai_server.sh                       # full setup
#   ./install_easyai_server.sh --model /path/to.gguf # required for first run
#   ./install_easyai_server.sh --backend vulkan      # force backend
#   ./install_easyai_server.sh --service-port 8080
#   ./install_easyai_server.sh --service-host 0.0.0.0
#   ./install_easyai_server.sh --mdns-hostname my-ai # box becomes my-ai.local
#                                                    # default: current system
#                                                    # hostname (so <host>.local
#                                                    # — e.g. ai-pro → ai-pro.local)
#                                                    # ignored under --no-avahi
#   ./install_easyai_server.sh --ctx-size 32768   # default 100000 (100 K)
#   ./install_easyai_server.sh --ngl 99            # GPU layers (-1=auto, 0=CPU)
#   ./install_easyai_server.sh --no-mlock --use-mmap
#   ./install_easyai_server.sh --temperature 0.2 --top-k 50 --min-p 0.03
#   ./install_easyai_server.sh --repeat-penalty 1.04 --frequency-penalty 0.05
#   ./install_easyai_server.sh --rope-scaling yarn --rope-scale 2 --yarn-orig-ctx 131072
#   ./install_easyai_server.sh --split-mode none       # none|layer|row|tensor
#   ./install_easyai_server.sh --http-timeout 86400   # default 24h, matches cli
#   ./install_easyai_server.sh --webui-title "AI Box"
#   ./install_easyai_server.sh --webui-icon /path/to/logo.svg   # ico|png|svg|gif|jpg|webp
#   ./install_easyai_server.sh --upgrade             # git pull + rebuild
#   ./install_easyai_server.sh --force               # CLEAN-SLATE rewrite:
#                                                    #   - easyai.ini backed up
#                                                    #     to .bak then replaced
#                                                    #   - systemd unit stopped,
#                                                    #     disabled, removed
#                                                    #   - ENTIRE drop-in dir
#                                                    #     (.service.d/*) wiped
#                                                    #     incl. operator-edited
#                                                    #     override.conf
#                                                    #   - systemctl reset-failed
#                                                    #     so a fresh unit isn't
#                                                    #     blocked by prior
#                                                    #     StartLimitBurst gate
#                                                    #   - new unit written
#                                                    #   - daemon-reload
#                                                    # Use this after changing
#                                                    # installer defaults OR to
#                                                    # recover a corrupted unit.
#                                                    # system.txt is NOT touched
#                                                    # (only the template ships).
#   ./install_easyai_server.sh --enable-now          # systemctl start now
#   ./install_easyai_server.sh --enable-verbose      # bake --verbose into ExecStart (noisy)
#   ./install_easyai_server.sh --no-llama-tools      # skip the llama.cpp tool
#                                                    # binaries (llama-cli,
#                                                    # llama-server,
#                                                    # llama-gguf-split,
#                                                    # llama-quantize,
#                                                    # llama-bench, ...).
#                                                    # default: they are
#                                                    # built and installed to
#                                                    # $prefix/bin alongside
#                                                    # easyai-server.
#   ./install_easyai_server.sh --mtp                 # bake --spec-type draft-mtp
#                                                    # --spec-draft-n-max 6 into
#                                                    # ExecStart. Only for MTP-
#                                                    # trained models (DeepSeek
#                                                    # V3, MimoVL, etc.); plain
#                                                    # models won't load.
#   ./install_easyai_server.sh --mtp --mtp-n-max 8   # override the draft window
#   ./install_easyai_server.sh --no-lemonade         # skip the Lemonade Server
#                                                    # install. Default: install
#                                                    # Lemonade Server (AMD's
#                                                    # NPU-capable LLM runtime)
#                                                    # alongside easyai-server,
#                                                    # but leave its systemd
#                                                    # unit DISABLED — operator
#                                                    # starts it manually via
#                                                    # `lemonade-server serve
#                                                    # --no-tray`. Default port
#                                                    # 13305. Ubuntu-only path
#                                                    # (PPA); on Debian we warn
#                                                    # and skip with a pointer
#                                                    # to the manual build.
#   ./install_easyai_server.sh --no-rocm-install     # skip auto-install of the
#                                                    # ROCm SDK when --backend
#                                                    # hip is used. Default:
#                                                    # ON for AMD-CPU boxes —
#                                                    # the installer adds the
#                                                    # repo.radeon.com APT repo
#                                                    # (noble pinned on non-LTS
#                                                    # Ubuntu like 25.10), then
#                                                    # apt-installs the minimal
#                                                    # HIP set (rocm-hip-runtime
#                                                    # + rocm-hip-sdk + rocblas
#                                                    # + hipblas + rocm-device-
#                                                    # libs, ~5 GB, NOT the full
#                                                    # ~25 GB ROCm meta-pkg).
#                                                    # /etc/profile.d/rocm.sh
#                                                    # is dropped so /opt/rocm/
#                                                    # bin is on PATH for every
#                                                    # shell. AMDGPU_TARGETS is
#                                                    # auto-detected from
#                                                    # rocminfo and passed to
#                                                    # cmake (e.g. gfx1151 on
#                                                    # Strix Point / 890M).
#                                                    # Skipped on non-AMD CPU,
#                                                    # non-hip backend, non-
#                                                    # Ubuntu distro, or if
#                                                    # hipcc is already on PATH.
#   ./install_easyai_server.sh --rocm-version 6.4    # override ROCm repo version
#                                                    # (default 6.4). Bump when
#                                                    # newer ROCm lands or pin
#                                                    # to an older series for
#                                                    # reproducibility.
#   ./install_easyai_server.sh --no-tdp-unlock       # skip the Ryzen TDP unlock.
#                                                    # Default: ON for AMD Ryzen
#                                                    # CPUs — the installer
#                                                    # builds ryzenadj from
#                                                    # source (no apt package
#                                                    # exists), drops a systemd
#                                                    # oneshot + a 60 s timer
#                                                    # under easyai-tdp.{service,
#                                                    # timer}, and reapplies
#                                                    # STAPM/PPT/SLOW/FAST = 54 W
#                                                    # plus Tctl = 95 °C on every
#                                                    # boot AND every 60 s
#                                                    # (ryzenadj limits drift back
#                                                    # after C6/sleep on some
#                                                    # platforms — the timer
#                                                    # keeps them pinned). The
#                                                    # 54 W cap matches the
#                                                    # HX 370's spec limit;
#                                                    # check `sensors` under
#                                                    # load and back off via
#                                                    # --tdp-watts if the box
#                                                    # throttles past 95 °C.
#                                                    # Skipped on non-AMD CPUs.
#   ./install_easyai_server.sh --tdp-watts 45        # cap STAPM/PPT/SLOW/FAST
#                                                    # in watts (default 54).
#                                                    # ryzenadj uses mW under
#                                                    # the hood — we multiply
#                                                    # by 1000 for you.
#   ./install_easyai_server.sh --tdp-tctl 90         # Tctl junction temp cap
#                                                    # in °C (default 95). Lower
#                                                    # values throttle sooner;
#                                                    # raise only if cooling is
#                                                    # confirmed adequate.
#   ./install_easyai_server.sh --no-service          # build/install only
#   ./install_easyai_server.sh -h                    # show this help
# ============================================================================

set -euo pipefail

# ---------- defaults --------------------------------------------------------
src_root="${src_root:-$HOME/opt}"
easyai_dir="$src_root/easyai"
llama_dir="$src_root/llama.cpp"
easyai_repo="${easyai_repo:-https://github.com/solariun/easy.git}"
llama_repo="${llama_repo:-https://github.com/ggml-org/llama.cpp.git}"
easyai_ref=""                                 # empty = main; pass --ref <sha|tag>
llama_ref=""                                  # empty = main; pass --llama-ref

install_prefix="/usr"
backend="auto"                                # auto|vulkan|cuda|hip|cpu
gtt_gb=29                                     # AMD iGPU GTT (only used by RDNA2/iGPU)
jobs="$(nproc 2>/dev/null || echo 4)"

do_install=1                                  # apt-get install deps
do_build=1
do_groups=1                                   # add user to render+video
do_limits=1                                   # /etc/security/limits.d for mlock
do_swap="off"                                 # off|tune|"" (keep)
do_kernel=1                                   # AMD iGPU GTT cmdline
do_service=1
do_force_service=0
do_force=0                                    # --force: superset of
                                              # --force-service; also rewrites
                                              # /etc/easyai/easyai.ini even if
                                              # it already exists.  Use after
                                              # changing installer defaults to
                                              # propagate them to the box.
                                              # The active system.txt is no
                                              # longer installed — only the
                                              # template — so --force does not
                                              # touch it.
do_enable_now=0
do_avahi=1
do_presets=1                                  # symlink easyai-cli → /usr/bin/ai
do_model=1
do_upgrade=0
copy_model=0
do_lemonade=1                                 # install Lemonade Server (AMD's
                                              # NPU-capable LLM runtime) via
                                              # the lemonade-team PPA on
                                              # Ubuntu. Skipped on non-Ubuntu
                                              # (Debian etc.) with a pointer
                                              # to the upstream manual build.
                                              # The systemd unit shipped by
                                              # the PPA is DISABLED + STOPPED
                                              # after install so the operator
                                              # runs Lemonade by hand — that
                                              # was the user's explicit ask
                                              # (manual NPU experimentation,
                                              # no auto-start). Default port
                                              # is upstream 13305 (not
                                              # configurable here). Set 0 via
                                              # --no-lemonade.
do_rocm_install=1                             # auto-install minimal ROCm SDK
                                              # (rocm-hip-runtime + rocm-hip-
                                              # sdk + rocblas + hipblas +
                                              # rocm-device-libs) when CPU is
                                              # AMD AND --backend hip ends up
                                              # selected. Adds repo.radeon.com
                                              # APT repo (pinned to noble on
                                              # non-LTS Ubuntu like 25.10
                                              # because AMD only publishes for
                                              # LTS). The installer's existing
                                              # "assuming rocm-dev installed"
                                              # warn becomes a real install.
                                              # Set 0 via --no-rocm-install.
rocm_version="6.4"                            # ROCm series for the APT repo
                                              # URL. Bump as new releases land
                                              # (https://repo.radeon.com/rocm/
                                              # apt/<ver>) or override with
                                              # --rocm-version.
do_tdp_unlock=1                               # auto-unlock TDP on AMD Ryzen.
                                              # Builds ryzenadj from source
                                              # (no apt package), installs to
                                              # /usr/local/bin, drops a oneshot
                                              # systemd unit + 60 s timer that
                                              # reapplies the limits on every
                                              # boot and every 60 s (ryzenadj
                                              # values can drift back after
                                              # C6/sleep on some platforms;
                                              # the timer keeps them pinned).
                                              # Skipped on non-AMD CPUs. Set
                                              # 0 via --no-tdp-unlock.
tdp_watts=54                                  # STAPM / PPT-SLOW / PPT-FAST
                                              # cap in W (HX 370 spec limit).
                                              # Converted to mW for ryzenadj.
                                              # Override with --tdp-watts.
tdp_tctl=95                                   # Tctl junction temp cap in °C.
                                              # 95 = chip max; lower throttles
                                              # earlier. Override --tdp-tctl.
do_llama_tools=1                              # also build + install
                                              # llama.cpp's CLI tools
                                              # (llama-cli, llama-server,
                                              # llama-gguf-split,
                                              # llama-quantize, llama-bench,
                                              # llama-tokenize,
                                              # llama-imatrix,
                                              # llama-perplexity, ...) so
                                              # the AI box has the full
                                              # llama.cpp tool surface
                                              # next to easyai-server. Set
                                              # to 0 via --no-llama-tools
                                              # for a faster rebuild when
                                              # iterating on easyai itself.

# easyai-server runtime config (compiled into the unit file)
service_user="easyai"
service_group="easyai"
service_home="/var/lib/easyai"
service_model_dir="$service_home/models"
service_model_link="ai.gguf"
service_workspace="$service_home/workspace"
service_host="0.0.0.0"
service_port=80                                # matches install_llama_server.sh default
service_alias="EasyAi"
service_name="easyai-server.service"
# mDNS / kernel hostname. We rename the system hostname here so the box
# advertises as `<mdns_hostname>.local` via avahi. Default mirrors the
# current system hostname so the box keeps the name the operator has
# already given it (e.g. host `ai-pro` → `ai-pro.local`, host `ai` →
# `ai.local`, host `xx` → `xx.local`). Pass --mdns-hostname to override.
# Skipped entirely when --no-avahi is passed (operator keeps their
# existing hostname and falls back to LAN-IP).
mdns_hostname="$(hostname -s 2>/dev/null || hostname)"

config_dir="/etc/easyai"
# By default the binary's built-in "Deep" prompt wins (no system_file
# set in the INI).  We drop a documented TEMPLATE next to it that the
# operator can copy/edit/rename to take over: `system.txt_template`.
# Out-of-the-box: only the template ships; the active system.txt is
# NOT created so the built-in default persona is what the server uses.
# Operators who want a custom prompt:
#     sudo cp system.txt_template system.txt
#     sudo $EDITOR system.txt                # tweak as needed
#     # uncomment SERVER.system_file in /etc/easyai/easyai.ini
#     sudo systemctl restart easyai-server
system_template_file="$config_dir/system.txt_template"
system_file="$config_dir/system.txt"  # NOT created by default; path used by INI hint only
api_key_file="$config_dir/api_key"
external_tools_dir="$config_dir/external-tools"
ini_file="$config_dir/easyai.ini"
# Central INI config — auth for /mcp ([MCP_USER]) plus reserved sections
# for future server-wide config. Always passed to the server via
# --config; missing-file = MCP open (default for fresh installs).
# We always pass --external-tools to the server. An empty dir is a
# normal state (no extra tools); operators add EASYAI-*.tools files
# without touching the systemd unit.

# RAG — the agent's persistent registry / long-term memory.
# Lives under /var/lib (mutable state) rather than /etc (config),
# because the AGENT writes here at runtime — operator config goes
# in /etc, agent-generated state goes in /var/lib (FHS).
rag_dir="/var/lib/easyai/rag"

# 256 K context — needed for long agentic flows, deep research, and
# large codebases.  Paired with --rope-scaling yarn + --rope-scale 2
# + --yarn-orig-ctx 131072 to extend models trained at 128 K.
# Override with --ctx-size.
ctx_size=262144
# --ngl 99: force all layers onto GPU.  The research/coding agent
# workload assumes a GPU with enough VRAM to hold the full model.
# Use --ngl -1 for auto-fit or --ngl 0 for CPU-only.
ngl=99
webui_title="EasyAi"                          # --webui-title <text>
webui_icon=""                                 # --webui-icon <path/to/.ico|.png|.svg|.gif|.jpg|.webp>
webui_icon_dest="$config_dir/favicon"         # final installed path under /etc/easyai
# Threads: 8 (sweet spot for Strix Point / Ryzen AI 9 HX 370 with most
# layers on iGPU). The production AI box was hardcoded at 16 originally;
# llama-bench on the 890M with an 80B MoE running ngl=99 reproduced the
# same tps at 8 threads as at 16 — the extra 8 just contend with
# sampling / KV management / network paths that still run on CPU.
# Operators with dense models running CPU-side (or no GPU offload at
# all) should override to physical core count via --threads /
# --threads-batch.
n_threads_default=8
n_threads_batch_default=8
preset="precise"                              # written commented in the INI; engine
                                              # picks "precise" when no preset is set
thinking="on"
enable_metrics=1
enable_flash_attn=1
enable_verbose=1                              # writes verbose=on into the INI;
                                              # operator's primary debug switch
mtp=0                                         # --mtp: bake --spec-type draft-mtp
                                              # --spec-draft-n-max 6 into ExecStart
                                              # (only meaningful for MTP-trained
                                              # models — DeepSeek V3, MimoVL, etc.)
mtp_n_max=6                                   # --mtp-n-max <n>: override the draft
                                              # window when --mtp is on
cache_type_k="q8_0"                           # K cache: q8_0 — attention scores
                                              # need precision; quantizing K hurts
                                              # more than V
cache_type_v="q8_0"                           # V cache: q8_0 too — symmetric with K.
                                              # The asymmetric q8/q4 split saves
                                              # ~25 % KV memory but loses quality
                                              # on long agentic flows; production
                                              # tuning on the AI box settled on
                                              # symmetric q8 for both halves
mlock=1                                       # pin small CPU residue (embeddings,
                                              # scratch) — most weights are on GPU.
# no_mmap=1 forces an anonymous-memory load of the GGUF (no kernel
# mmap).  Paired with mlock=1, every weight page is pinned in RAM with
# no file-backing — eliminates the kernel's hint-driven eviction
# during long-running agentic sessions on the production AI box.
# Costs a few GB of load-time RSS peak but stabilises latency once
# warm.  Set no_mmap=0 to reclaim that peak on hosts where RAM is
# tight or the model is loaded from a slow disk.
no_mmap=1
# HTTP read+write timeout for the listen socket AND the MCP-client connection.
# 86400 (24 h) matches easyai-cli's default --timeout, so multi-hour agentic
# sessions don't get cut by either side.  Reduce for public-facing servers
# where slow-loris resilience matters more than long-thinking-turn support.
http_timeout=86400
# RoPE / YaRN context extension — needed when ctx_size exceeds the
# model's native training context. "yarn" scaling with scale=2 and
# yarn_orig_ctx=131072 doubles a 128K-trained model to 256K.
rope_scaling="yarn"
rope_freq_scale="2"
yarn_orig_ctx=131072
# GPU split mode: none=single GPU, layer=split layers across GPUs (default
# in llama.cpp), row=tensor parallelism, tensor=full tensor parallelism.
# "none" is correct for single-GPU / iGPU systems.
split_mode="none"
# Sampling defaults written into [ENGINE] ACTIVE (not commented).
# These are the BASELINE — intentionally loose so the engine works
# acceptably with any model out of the box. Per-model tuning lives
# in [MODEL_<pattern>] sections which override these when the loaded
# model name matches. presence_penalty=1.5 is the anti-loop safety
# net for generic MoE/thinking models; model-specific profiles
# (Qwen3-Coder-Next, Qwen3.6, DeepSeek) lower or zero it.
# max_tokens=12288 caps a single response turn (code rarely exceeds
# this; runaway loops are caught earlier).
# Override per-workload via --temperature / --top-p / --top-k /
# --min-p / --presence-penalty / --repeat-penalty / --frequency-penalty.
temperature="1.0"
top_p="0.95"
top_k=20
min_p="0.0"
repeat_penalty="1.0"
presence_penalty="1.5"
frequency_penalty="0.05"
max_tokens=12288
api_key=""                                    # leave empty to skip auth (open server)

model_src=""                                  # required when --no-model NOT passed

# ---------- arg parsing -----------------------------------------------------
while [[ $# -gt 0 ]]; do
    case "$1" in
        --src-root)         src_root="$2"; easyai_dir="$src_root/easyai"; llama_dir="$src_root/llama.cpp"; shift 2 ;;
        --easyai-dir)       easyai_dir="$2"; shift 2 ;;
        --llama-dir)        llama_dir="$2"; shift 2 ;;
        --easyai-repo)      easyai_repo="$2"; shift 2 ;;
        --llama-repo)       llama_repo="$2"; shift 2 ;;
        --ref)              easyai_ref="$2"; shift 2 ;;
        --llama-ref)        llama_ref="$2"; shift 2 ;;
        --prefix)           install_prefix="$2"; shift 2 ;;
        --backend)          backend="$2"; shift 2 ;;
        --gtt)              gtt_gb="$2"; shift 2 ;;
        -j|--jobs)          jobs="$2"; shift 2 ;;
        --no-install)       do_install=0; shift ;;
        --no-build)         do_build=0; shift ;;
        --no-groups)        do_groups=0; shift ;;
        --no-limits)        do_limits=0; shift ;;
        --no-kernel)        do_kernel=0; shift ;;
        --no-service)       do_service=0; shift ;;
        --no-model)         do_model=0; shift ;;
        --no-avahi)         do_avahi=0; shift ;;
        --no-presets)       do_presets=0; shift ;;
        --no-llama-tools)   do_llama_tools=0; shift ;;
        --with-llama-tools) do_llama_tools=1; shift ;;
        --no-lemonade)      do_lemonade=0; shift ;;
        --with-lemonade)    do_lemonade=1; shift ;;
        --no-rocm-install)  do_rocm_install=0; shift ;;
        --with-rocm-install) do_rocm_install=1; shift ;;
        --rocm-version)     rocm_version="$2"; shift 2 ;;
        --no-tdp-unlock)    do_tdp_unlock=0; shift ;;
        --with-tdp-unlock)  do_tdp_unlock=1; shift ;;
        --tdp-watts)        tdp_watts="$2"; shift 2 ;;
        --tdp-tctl)         tdp_tctl="$2"; shift 2 ;;
        --no-swap)          do_swap="off"; shift ;;
        --swap-tune)        do_swap="tune"; shift ;;
        --keep-swap)        do_swap=""; shift ;;
        --upgrade)          do_upgrade=1; shift ;;
        --enable-now)       do_enable_now=1; shift ;;
        --no-enable)        do_enable_now=0; shift ;;
        --force-service)    do_force_service=1; shift ;;
        --force)            do_force=1; do_force_service=1; shift ;;
        --service-host)     service_host="$2"; shift 2 ;;
        --service-port)     service_port="$2"; shift 2 ;;
        --mdns-hostname)    mdns_hostname="$2"; shift 2 ;;
        --alias)            service_alias="$2"; shift 2 ;;
        --ctx-size)         ctx_size="$2"; shift 2 ;;
        --ngl|--n-gpu-layers) ngl="$2"; shift 2 ;;
        --threads)          n_threads_default="$2"; shift 2 ;;
        --threads-batch)    n_threads_batch_default="$2"; shift 2 ;;
        --preset)           preset="$2"; shift 2 ;;
        --thinking)         thinking="$2"; shift 2 ;;
        --no-metrics)       enable_metrics=0; shift ;;
        --no-flash-attn)    enable_flash_attn=0; shift ;;
        --enable-verbose)   enable_verbose=1; shift ;;
        --no-verbose)       enable_verbose=0; shift ;;
        --cache-type-k)     cache_type_k="$2"; shift 2 ;;
        --cache-type-v)     cache_type_v="$2"; shift 2 ;;
        --no-mlock)         mlock=0; shift ;;
        --use-mmap)         no_mmap=0; shift ;;
        --no-mmap)          no_mmap=1; shift ;;
        --repeat-penalty)   repeat_penalty="$2"; shift 2 ;;
        --presence-penalty) presence_penalty="$2"; shift 2 ;;
        --frequency-penalty) frequency_penalty="$2"; shift 2 ;;
        --temperature)      temperature="$2"; shift 2 ;;
        --top-p)            top_p="$2"; shift 2 ;;
        --top-k)            top_k="$2"; shift 2 ;;
        --min-p)            min_p="$2"; shift 2 ;;
        --max-tokens)       max_tokens="$2"; shift 2 ;;
        --http-timeout)     http_timeout="$2"; shift 2 ;;
        --rope-scaling)     rope_scaling="$2"; shift 2 ;;
        --rope-scale)       rope_freq_scale="$2"; shift 2 ;;
        --yarn-orig-ctx)    yarn_orig_ctx="$2"; shift 2 ;;
        --split-mode)       split_mode="$2"; shift 2 ;;
        # Numeric sampling/timeout values are written into easyai.ini
        # via heredoc.  We interpolate them as-is, so anything other
        # than a number could sneak a newline or extra "key = value"
        # pair into the INI (e.g. injecting "allow_bash = on" via a
        # crafted --temperature).  Validate up front; arg parsing
        # already scoped each value to one argv slot, so we only need
        # to constrain content shape here.
        --api-key)          api_key="$2"; shift 2 ;;
        --model)            model_src="$2"; shift 2 ;;
        --copy-model)       copy_model=1; shift ;;

        # ---- install_llama_server.sh drop-in compat ----------------------
        # These were the proxy / SearXNG / MCP / spec-decoding / webui-rebrand
        # knobs of the old installer.  They're accepted here so existing
        # provisioning scripts keep working unchanged; most are no-ops because
        # the corresponding feature is now built into easyai-server.
        --source-dir)       easyai_dir="$2"; shift 2 ;;   # alias for --easyai-dir
        --with-mcp|--no-mcp)
            warn "$1: ignored — easyai bundles web_search/web_fetch as built-in tools"
            shift ;;
        --webui-title)      webui_title="$2"; shift 2 ;;
        --webui-icon)       webui_icon="$2";  shift 2 ;;
        --thinking-budget)
            warn "--thinking-budget: not yet supported in easyai (use --thinking on/off + --max-tokens at runtime)"
            shift 2 ;;
        --draft-model|--draft-max|--draft-min)
            warn "$1: classic draft-model speculative decoding not wired up in easyai — flag ignored. For MTP-trained models, pass --mtp instead."
            shift 2 ;;
        --no-draft)
            warn "--no-draft: ignored (speculative decoding is off by default in easyai; pass --mtp to enable MTP)"
            shift ;;
        --mtp)
            # Bake `--spec-type draft-mtp --spec-draft-n-max $mtp_n_max`
            # into ExecStart. Only meaningful when the served model was
            # trained with MTP heads (DeepSeek V3, MimoVL, etc.); other
            # models will refuse to load or run plain autoregressive.
            mtp=1
            shift ;;
        --mtp-n-max)
            mtp_n_max="$2"
            if [[ ! "$mtp_n_max" =~ ^[0-9]+$ ]] || (( mtp_n_max < 1 )); then
                die "--mtp-n-max: expected positive integer, got: $(printf '%q' "$mtp_n_max")"
            fi
            shift 2 ;;
        --list-tags)
            # Mirror the original installer's behaviour: list recent tags of the
            # easyai repo instead of llama.cpp's.
            if [[ -d "$easyai_dir/.git" ]]; then
                git -C "$easyai_dir" fetch --tags --prune --force >&2 || true
                git -C "$easyai_dir" tag -l 2>/dev/null | tail -20 | sed 's/^/  /'
            else
                echo "  (no local clone yet — pass --easyai-dir or run a normal install first)"
            fi
            exit 0 ;;
        # -----------------------------------------------------------------

        -h|--help)          sed -n '2,110p' "$0"; exit 0 ;;
        *)
            echo "unknown arg: $1" >&2
            echo "run with --help for usage" >&2
            exit 2 ;;
    esac
done

# ---------- helpers ---------------------------------------------------------
log()  { printf '\033[1;32m[+]\033[0m %s\n' "$*"; }
warn() { printf '\033[1;33m[!]\033[0m %s\n' "$*"; }
die()  { printf '\033[1;31m[x]\033[0m %s\n' "$*" >&2; exit 1; }
ask()  { local r; read -rp "    $* [y/N] " r; [[ "$r" =~ ^[Yy]$ ]]; }

# Validate that a sampling/timeout value is plain numeric (int or float,
# optional leading minus).  We write these into easyai.ini via heredoc
# expansion; a value containing a newline or "=" would inject extra INI
# keys (e.g. an attacker passing --temperature $'0.3\nallow_bash = on'
# would flip allow_bash on).  Reject anything that isn't a number.
require_numeric() {
    local name="$1" value="$2"
    if [[ ! "$value" =~ ^-?[0-9]+(\.[0-9]+)?$ ]]; then
        die "$name: must be numeric, got: $(printf '%q' "$value")"
    fi
}

# Reject any value that could carve out a new INI line or section when
# expanded into the heredoc.  Used for non-numeric knobs (host, alias,
# webui_title, cache_type_*) where legitimate inputs contain letters /
# digits / dashes / dots / spaces but NEVER newlines, '=', '[', or ']'.
# Mirrors require_numeric for the threat model in §20.4 and 21.7.
require_no_injection() {
    local name="$1" value="$2"
    if [[ "$value" == *$'\n'* || "$value" == *$'\r'* ]]; then
        die "$name: must not contain newlines"
    fi
    if [[ "$value" == *'='* || "$value" == *'['* || "$value" == *']'* ]]; then
        die "$name: must not contain '=', '[' or ']' (would inject into easyai.ini)"
    fi
}

# ---------- pre-flight ------------------------------------------------------
[[ $EUID -eq 0 ]] && die "do not run as root — script calls sudo as needed"

# Validate numeric inputs before they flow into the INI heredoc.  The
# arg-parser scoped each value to a single argv slot already; this layer
# constrains the content shape.
require_numeric "--temperature"     "$temperature"
require_numeric "--top-p"           "$top_p"
require_numeric "--top-k"           "$top_k"
require_numeric "--min-p"           "$min_p"
require_numeric "--repeat-penalty"  "$repeat_penalty"
require_numeric "--presence-penalty" "$presence_penalty"
require_numeric "--frequency-penalty" "$frequency_penalty"
require_numeric "--max-tokens"      "$max_tokens"
require_numeric "--http-timeout"    "$http_timeout"
require_numeric "--ctx-size"        "$ctx_size"
require_numeric "--service-port"    "$service_port"
require_numeric "--threads"         "$n_threads_default"
require_numeric "--threads-batch"   "$n_threads_batch_default"
require_numeric "--ngl"             "$ngl"
require_numeric "--rope-scale"      "$rope_freq_scale"
require_numeric "--yarn-orig-ctx"   "$yarn_orig_ctx"

# Non-numeric knobs also flow into the heredoc; reject newline / '=' /
# '[' / ']' shapes so they can't carve a new section or override key.
require_no_injection "--service-host" "$service_host"
require_no_injection "--alias"        "$service_alias"
require_no_injection "--webui-title"  "$webui_title"
require_no_injection "--cache-type-k" "$cache_type_k"
require_no_injection "--cache-type-v" "$cache_type_v"
require_no_injection "--rope-scaling" "$rope_scaling"
require_no_injection "--split-mode"   "$split_mode"

# Hostname must be a valid RFC 1123 label: letters / digits / hyphens,
# no leading or trailing hyphen, max 63 chars. hostnamectl would reject
# malformed names anyway; catching it here gives a friendlier error.
if [[ ! "$mdns_hostname" =~ ^[A-Za-z0-9]([A-Za-z0-9-]{0,61}[A-Za-z0-9])?$ ]]; then
    die "--mdns-hostname: must be a valid hostname label (letters/digits/hyphens, ≤63 chars), got: $(printf '%q' "$mdns_hostname")"
fi

if [[ "$(uname -s)" != "Linux" ]]; then
    die "this installer targets Linux. On macOS, build manually — see README.md (Build for your hardware)."
fi

command -v apt-get >/dev/null \
    || die "this script targets Debian/Ubuntu (apt). Adapt package names for other distros."

# ---------- backend auto-detect --------------------------------------------
detect_backend() {
    # honour explicit --backend
    if [[ "$backend" != "auto" ]]; then echo "$backend"; return; fi
    # NVIDIA?
    if command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi >/dev/null 2>&1; then
        echo cuda; return
    fi
    # AMD ROCm?
    if command -v rocminfo >/dev/null 2>&1; then
        echo hip; return
    fi
    # any GPU visible to Vulkan?
    if command -v vulkaninfo >/dev/null 2>&1 \
            && vulkaninfo --summary 2>/dev/null | grep -qiE 'deviceName'; then
        echo vulkan; return
    fi
    # AMD GPU PCI present but Vulkan not yet installed → still pick vulkan, we'll install the SDK
    if command -v lspci >/dev/null 2>&1 \
            && lspci 2>/dev/null | grep -qiE 'vga|display' \
            && lspci 2>/dev/null | grep -qiE 'amd|radeon'; then
        echo vulkan; return
    fi
    echo cpu
}
backend_resolved="$(detect_backend)"

case "$backend_resolved" in
    auto|vulkan|cuda|hip|cpu) ;;
    *) die "unknown backend: $backend_resolved" ;;
esac

# ---------- print effective config -----------------------------------------
gtt_pages=$(( gtt_gb * 262144 ))     # GTT page count, 4 KiB per page

log "effective config:"
printf '    src_root         = %s\n' "$src_root"
printf '    easyai_dir       = %s\n' "$easyai_dir"
printf '    llama_dir        = %s\n' "$llama_dir"
printf '    install_prefix   = %s\n' "$install_prefix"
printf '    backend          = %s\n' "$backend_resolved"
printf '    service_host     = %s\n' "$service_host"
printf '    service_port     = %s\n' "$service_port"
printf '    service_alias    = %s\n' "$service_alias"
printf '    mdns_hostname    = %s   (advertises as %s.local; skipped under --no-avahi)\n' \
    "$mdns_hostname" "$mdns_hostname"
printf '    ctx_size         = %s\n' "$ctx_size"
printf '    ngl              = %s   (-1=auto, 0=CPU only, 99=all GPU layers)\n' "$ngl"
printf '    threads / batch  = %s / %s\n' "$n_threads_default" "$n_threads_batch_default"
printf '    preset           = %s  thinking=%s\n' "$preset" "$thinking"
printf '    KV cache         = K=%s  V=%s  flash_attn=%s\n' "$cache_type_k" "$cache_type_v" "$enable_flash_attn"
printf '    split_mode       = %s\n' "$split_mode"
printf '    rope             = scaling=%s  scale=%s  yarn_orig_ctx=%s\n' \
                                 "$rope_scaling" "$rope_freq_scale" "$yarn_orig_ctx"
printf '    memory           = mlock=%s  no_mmap=%s\n' "$mlock" "$no_mmap"
printf '    http_timeout     = %ss\n' "$http_timeout"
printf '    sampling         = temp=%s top_p=%s top_k=%s min_p=%s\n' \
                                 "$temperature" "$top_p" "$top_k" "$min_p"
printf '                       repeat_penalty=%s  presence_penalty=%s  frequency_penalty=%s\n' \
                                 "$repeat_penalty" "$presence_penalty" "$frequency_penalty"
printf '                       max_tokens=%s\n' "$max_tokens"
printf '    metrics          = %s\n' "$enable_metrics"
printf '    verbose          = %s\n' "$enable_verbose"
printf '    llama_tools      = %s   (llama-cli/server/gguf-split/quantize/bench/... in $prefix/bin)\n' \
    "$([[ $do_llama_tools -eq 1 ]] && echo on || echo off)"
printf '    lemonade         = %s   (AMD Lemonade Server for NPU; systemd unit DISABLED, manual start)\n' \
    "$([[ $do_lemonade -eq 1 ]] && echo on || echo off)"
printf '    rocm_install     = %s   (auto-install minimal ROCm SDK for --backend hip on AMD CPU; version=%s)\n' \
    "$([[ $do_rocm_install -eq 1 ]] && echo on || echo off)" "$rocm_version"
printf '    tdp_unlock       = %s   (Ryzen TDP unlock via ryzenadj+systemd timer; cap=%sW tctl=%s°C)\n' \
    "$([[ $do_tdp_unlock -eq 1 ]] && echo on || echo off)" "$tdp_watts" "$tdp_tctl"
printf '    webui_title      = %s\n' "$webui_title"
printf '    webui_icon       = %s\n' "${webui_icon:-<default — no icon>}"
printf '    api_key          = %s\n' "$([[ -n "$api_key" ]] && echo "<set>" || echo "<none — server is open>")"
printf '    model_src        = %s\n' "${model_src:-<none — pass --model PATH>}"
printf '    flags            = install:%s build:%s groups:%s limits:%s kernel:%s\n' \
    "$do_install" "$do_build" "$do_groups" "$do_limits" "$do_kernel"
printf '                       service:%s model:%s avahi:%s presets:%s\n' \
    "$do_service" "$do_model" "$do_avahi" "$do_presets"
printf '                       enable_now:%s force_service:%s upgrade:%s\n' \
    "$do_enable_now" "$do_force_service" "$do_upgrade"
echo

if [[ $do_model -eq 1 && -z "$model_src" ]]; then
    warn "no --model PATH given. Pass it later or rerun with --no-model to skip."
fi

# ---------- detected hardware ----------------------------------------------
log "detected hardware:"
printf '    CPU : %s\n' "$(lscpu | awk -F: '/Model name/ {gsub(/^ +/,"",$2); print $2; exit}')"
printf '    RAM : %s\n' "$(free -h | awk '/^Mem:/ {print $2}')"
if command -v lspci >/dev/null; then
    printf '    GPU : %s\n' \
        "$(lspci | grep -iE 'vga|display' | head -1 | cut -d: -f3- | sed 's/^ //')"
fi

# ---------- install dependencies -------------------------------------------
if [[ $do_install -eq 1 ]]; then
    log "installing common build deps + libcurl + system tools"
    sudo apt-get update
    sudo apt-get install -y --no-install-recommends \
        build-essential cmake ninja-build git ccache pkg-config curl ca-certificates \
        libcurl4-openssl-dev libomp-dev libcap2-bin jq \
        systemd-coredump patchelf

    case "$backend_resolved" in
        vulkan)
            log "installing Vulkan SDK + Mesa drivers"
            sudo apt-get install -y --no-install-recommends \
                mesa-vulkan-drivers vulkan-tools libvulkan-dev \
                glslc glslang-tools spirv-tools libshaderc-dev
            ;;
        cuda)
            log "CUDA backend selected — assuming nvidia-cuda-toolkit is installed."
            warn "If 'nvcc --version' fails, install the CUDA Toolkit from https://developer.nvidia.com/cuda-downloads"
            ;;
        hip)
            # Detect CPU vendor — auto-install is gated on AMD CPU because
            # on Intel boxes `--backend hip` usually means something custom
            # (eg. a slot-in AMD dGPU) and we shouldn't drop the
            # repo.radeon.com APT source unless we're clearly on an AMD
            # shop. Operator can pass --with-rocm-install to force.
            cpu_vendor="$(awk -F: '/vendor_id/{gsub(/^[[:space:]]+|[[:space:]]+$/,"",$2); print $2; exit}' /proc/cpuinfo 2>/dev/null)"

            # Distro detection — repo.radeon.com only publishes builds
            # for Ubuntu LTS releases (jammy / noble at time of writing).
            distro_id=""
            distro_codename=""
            if [[ -r /etc/os-release ]]; then
                # shellcheck disable=SC1091
                distro_id="$(. /etc/os-release && echo "${ID:-}")"
                # shellcheck disable=SC1091
                distro_codename="$(. /etc/os-release && echo "${UBUNTU_CODENAME:-${VERSION_CODENAME:-}}")"
            fi

            if [[ $do_rocm_install -ne 1 ]]; then
                log "ROCm/HIP backend selected — auto-install disabled (--no-rocm-install)."
                warn "Install the ROCm SDK manually if rocminfo / hipcc are missing."
            elif [[ "$cpu_vendor" != "AuthenticAMD" ]]; then
                log "ROCm/HIP backend selected — CPU vendor is '${cpu_vendor:-unknown}', not AMD; skipping auto-install."
                warn "Install the ROCm SDK manually, or pass --with-rocm-install to force on a non-AMD CPU."
            elif command -v hipcc >/dev/null 2>&1 && command -v rocminfo >/dev/null 2>&1; then
                log "ROCm/HIP backend selected — hipcc + rocminfo already on PATH; skipping auto-install."
            elif [[ "$distro_id" != "ubuntu" ]]; then
                warn "ROCm auto-install: distro '${distro_id:-unknown}' is not Ubuntu; skipping."
                warn "  install ROCm manually per https://rocm.docs.amd.com/projects/install-on-linux/en/latest/"
                warn "  or pass --no-rocm-install to silence this notice."
            else
                # On non-LTS Ubuntu (25.10 / questing etc.) AMD doesn't
                # publish a matching .deb — pin the APT source to the
                # latest LTS (noble). The ROCm runtime libs are decoupled
                # enough from the Ubuntu userspace that the LTS .deb runs
                # on non-LTS once apt resolves libssl3 / libssl3t64
                # transition packages. Tested path on the AI box.
                case "$distro_codename" in
                    jammy|noble) ;;
                    *)
                        log "ROCm: Ubuntu '${distro_codename:-unknown}' is non-LTS; pinning APT source to 'noble'"
                        distro_codename="noble"
                        ;;
                esac

                log "ROCm/HIP auto-install — minimal SDK from repo.radeon.com/rocm/apt/${rocm_version} (${distro_codename})"

                # gnupg may not be on minimal Ubuntu images (gpg dearmors the key below).
                if ! command -v gpg >/dev/null 2>&1; then
                    sudo apt-get install -y gnupg
                fi

                # GPG key + sources.list, both idempotent.
                sudo mkdir -p --mode=0755 /etc/apt/keyrings
                if [[ ! -r /etc/apt/keyrings/rocm.gpg ]]; then
                    log "  fetching ROCm GPG key"
                    wget -qO- https://repo.radeon.com/rocm/rocm.gpg.key \
                        | gpg --dearmor \
                        | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
                fi
                if [[ ! -r /etc/apt/sources.list.d/rocm.list ]]; then
                    log "  adding APT source: rocm.list"
                    echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${rocm_version} ${distro_codename} main" \
                        | sudo tee /etc/apt/sources.list.d/rocm.list > /dev/null
                    sudo apt-get update
                else
                    log "  /etc/apt/sources.list.d/rocm.list already present, skipping add"
                fi

                # The MINIMAL HIP set for llama.cpp's GGML_HIP backend
                # (~5 GB). Avoids the full `rocm` meta-package (~25 GB
                # with MIOpen / migraphx / rocFFT / hipSPARSE etc. that
                # llama.cpp's HIP path does not link against).
                log "  apt-get install minimal HIP set (~5 GB)"
                sudo apt-get install -y \
                    rocm-hip-runtime rocm-hip-sdk rocblas-dev hipblas-dev \
                    hip-dev rocm-device-libs

                # /etc/profile.d snippet so every login shell + the cmake
                # invocation below find hipcc and link against /opt/rocm/lib.
                if [[ ! -r /etc/profile.d/rocm.sh ]]; then
                    log "  writing /etc/profile.d/rocm.sh"
                    sudo tee /etc/profile.d/rocm.sh >/dev/null <<'PROF'
# Added by easyai installer — /opt/rocm/bin on PATH, /opt/rocm/lib on linker.
export PATH="/opt/rocm/bin${PATH:+:$PATH}"
export LD_LIBRARY_PATH="/opt/rocm/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
PROF
                fi
                # Source for the rest of this installer run (cmake call
                # below needs hipcc on PATH; new login shells get it via
                # /etc/profile.d).
                # shellcheck disable=SC1091
                [[ -r /etc/profile.d/rocm.sh ]] && source /etc/profile.d/rocm.sh

                # Sanity: confirm hipcc + rocminfo are now resolvable, and
                # that ROCm sees an actual gfx device. If rocminfo finds
                # nothing the operator likely needs to add their user to
                # render+video and relog (handled elsewhere by do_groups).
                if command -v hipcc >/dev/null 2>&1 && command -v rocminfo >/dev/null 2>&1; then
                    log "  hipcc:    $(hipcc --version 2>/dev/null | head -1)"
                    gfx_found="$(rocminfo 2>/dev/null | awk '/Name:[[:space:]]+gfx[0-9]+/{print $2; exit}')"
                    if [[ -n "$gfx_found" ]]; then
                        log "  AMD GPU visible to ROCm: $gfx_found"
                    else
                        warn "  ROCm installed but rocminfo sees no gfx device — confirm membership in render+video and relog."
                    fi
                else
                    warn "  ROCm install: hipcc or rocminfo still missing after apt — check apt output"
                fi
            fi
            ;;
        cpu)
            log "CPU-only backend — no GPU SDK to install."
            ;;
    esac

    if [[ $do_avahi -eq 1 ]]; then
        sudo apt-get install -y --no-install-recommends avahi-daemon avahi-utils
    fi
fi

# Sanity check (Vulkan only).
if [[ "$backend_resolved" == "vulkan" ]] && command -v vulkaninfo >/dev/null; then
    log "Vulkan device check"
    if ! vulkaninfo --summary 2>/dev/null \
            | grep -qiE 'deviceName'; then
        warn "vulkaninfo returned no device — Mesa/driver state may be wrong"
    else
        vulkaninfo --summary 2>/dev/null \
            | grep -iE 'deviceName|driverName' | sed 's/^/    /'
    fi
fi

# ---------- clone / fetch sources ------------------------------------------
fetch_repo() {
    local dir="$1" repo="$2" ref="$3"
    if [[ -d "$dir/.git" ]]; then
        if [[ $do_upgrade -eq 1 ]]; then
            log "git fetch + pull --ff-only in $dir"
            git -C "$dir" fetch --tags --prune --force
            # When the user pinned an explicit ref via --ref / --llama-ref,
            # the checkout below moves to it; otherwise fast-forward the
            # current branch to its upstream so the working tree actually
            # advances. --ff-only refuses to merge / rebase silently — if
            # the operator has local commits we want them to know, not have
            # us silently wipe them.
            if [[ -z "$ref" ]]; then
                if ! git -C "$dir" pull --ff-only; then
                    warn "git pull --ff-only failed in $dir — local changes block the upgrade"
                    warn "resolve manually (git status / git stash / git reset) or pass --ref <sha>"
                fi
            fi
        fi
    else
        log "cloning $repo → $dir"
        mkdir -p "$(dirname "$dir")"
        git clone --filter=blob:none "$repo" "$dir"
    fi
    if [[ -n "$ref" ]]; then
        log "checking out ref '$ref' in $dir"
        git -C "$dir" checkout "$ref"
    fi
    # Show the resulting HEAD so logs make the upgrade visible.
    local head_short
    head_short=$(git -C "$dir" rev-parse --short HEAD 2>/dev/null || echo "?")
    log "  $dir at $head_short"
}

# llama.cpp must sit next to easyai (CMakeLists looks at ../llama.cpp).
fetch_repo "$llama_dir"  "$llama_repo"  "$llama_ref"
fetch_repo "$easyai_dir" "$easyai_repo" "$easyai_ref"

# Symlink llama.cpp as a sibling of easyai if they aren't already.
expected_llama="$(dirname "$easyai_dir")/llama.cpp"
if [[ "$llama_dir" != "$expected_llama" ]]; then
    if [[ ! -e "$expected_llama" ]]; then
        log "symlinking $llama_dir → $expected_llama"
        ln -s "$llama_dir" "$expected_llama"
    elif [[ ! -L "$expected_llama" ]]; then
        warn "$expected_llama exists and is not a symlink; leaving it alone"
    fi
fi

# ---------- build easyai ----------------------------------------------------
if [[ $do_build -eq 1 ]]; then
    log "configuring easyai build (backend=$backend_resolved)"
    cmake_flags=( -DCMAKE_BUILD_TYPE=Release -DEASYAI_BUILD_EXAMPLES=ON )
    if [[ $do_llama_tools -eq 1 ]]; then
        cmake_flags+=( -DEASYAI_BUILD_LLAMA_TOOLS=ON )
    fi
    case "$backend_resolved" in
        vulkan)  cmake_flags+=( -DGGML_VULKAN=ON ) ;;
        cuda)    cmake_flags+=( -DGGML_CUDA=ON ) ;;
        hip)
            cmake_flags+=( -DGGML_HIP=ON )
            # Auto-detect the gfx target from rocminfo so cmake builds
            # kernels for the actual installed GPU instead of the broad
            # default set (which inflates build time and may emit code
            # that doesn't match this box, e.g. gfx1100 on a gfx1151
            # Strix Point iGPU). If rocminfo isn't available (because
            # --no-rocm-install was set and the operator hasn't sourced
            # /etc/profile.d/rocm.sh in this shell), we leave AMDGPU_TARGETS
            # unset and let llama.cpp's HIP cmake pick its default.
            if command -v rocminfo >/dev/null 2>&1; then
                gfx_target="$(rocminfo 2>/dev/null | awk '/Name:[[:space:]]+gfx[0-9]+/{print $2; exit}')"
                if [[ -n "$gfx_target" ]]; then
                    log "  AMDGPU_TARGETS=$gfx_target (auto-detected from rocminfo)"
                    cmake_flags+=( -DAMDGPU_TARGETS="$gfx_target" )
                fi
            fi
            ;;
        cpu)     ;;  # no GPU flag
    esac

    pushd "$easyai_dir" >/dev/null
    cmake -S . -B build "${cmake_flags[@]}"
    log "building (jobs=$jobs)"
    cmake --build build -j "$jobs"
    popd >/dev/null
fi

# ---------- post-build sanity: which GPU backend was actually compiled? ----
# We honour what the build produced over what the user asked for, so a CPU-
# only binary doesn't spam GPU-related errors at runtime via --ngl.
detected_backends=""
while IFS= read -r so; do
    name=$(basename "$so" | sed -E 's/^libggml-([a-z0-9]+)\.so.*/\1/')
    case "$name" in
        base|cpu) ;;
        *) detected_backends="$detected_backends $name" ;;
    esac
done < <(find "$easyai_dir/build" -maxdepth 8 -name 'libggml-*.so*' 2>/dev/null | sort -u)
detected_backends=$(echo "$detected_backends" | xargs -n1 2>/dev/null | sort -u | tr '\n' ',' | sed 's/,$//; s/,/, /g')

if [[ -z "$detected_backends" ]]; then
    if [[ "$ngl" -ne 0 ]]; then
        warn "build is CPU-only (no libggml-{vulkan,cuda,hip,metal}.so found)"
        warn "forcing --ngl 0 in the systemd unit"