Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/cli/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import (

"github.com/rs/zerolog/log"

gguf "github.com/gpustack/gguf-parser-go"
cliContext "github.com/mudler/LocalAI/core/cli/context"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/gallery"
"github.com/mudler/LocalAI/pkg/downloader"
gguf "github.com/thxcode/gguf-parser-go"
)

type UtilCMD struct {
Expand Down Expand Up @@ -51,7 +51,7 @@ func (u *GGUFInfoCMD) Run(ctx *cliContext.Context) error {
log.Info().
Any("eosTokenID", f.Tokenizer().EOSTokenID).
Any("bosTokenID", f.Tokenizer().BOSTokenID).
Any("modelName", f.Model().Name).
Any("modelName", f.Metadata().Name).
Any("architecture", f.Architecture().Architecture).Msgf("GGUF file loaded: %s", u.Args[0])

log.Info().Any("tokenizer", fmt.Sprintf("%+v", f.Tokenizer())).Msg("Tokenizer")
Expand Down
53 changes: 48 additions & 5 deletions core/config/gguf.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ package config
import (
"strings"

"github.com/mudler/LocalAI/pkg/xsysinfo"
"github.com/rs/zerolog/log"

gguf "github.com/thxcode/gguf-parser-go"
gguf "github.com/gpustack/gguf-parser-go"
)

type familyType uint8
Expand All @@ -23,6 +24,7 @@ const (

const (
defaultContextSize = 1024
defaultNGPULayers = 99999999
)

type settingsConfig struct {
Expand Down Expand Up @@ -147,7 +149,7 @@ var knownTemplates = map[string]familyType{
func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {

if defaultCtx == 0 && cfg.ContextSize == nil {
ctxSize := f.EstimateLLaMACppUsage().ContextSize
ctxSize := f.EstimateLLaMACppRun().ContextSize
if ctxSize > 0 {
cSize := int(ctxSize)
cfg.ContextSize = &cSize
Expand All @@ -157,6 +159,46 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
}
}

// GPU options
if cfg.Options == nil {
if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") {
cfg.Options = []string{"gpu"}
}
}

// vram estimation
vram, err := xsysinfo.TotalAvailableVRAM()
if err != nil {
log.Error().Msgf("guessDefaultsFromFile(TotalAvailableVRAM): %s", err)
} else {
estimate, err := xsysinfo.EstimateGGUFVRAMUsage(f, vram)
if err != nil {
log.Error().Msgf("guessDefaultsFromFile(EstimateGGUFVRAMUsage): %s", err)
} else {
if estimate.IsFullOffload {
log.Warn().Msgf("guessDefaultsFromFile: %s", "full offload is recommended")
}

if estimate.EstimatedVRAM > vram {
log.Warn().Msgf("guessDefaultsFromFile: %s", "estimated VRAM usage is greater than available VRAM")
}

if cfg.NGPULayers == nil && estimate.EstimatedLayers > 0 {
log.Debug().Msgf("guessDefaultsFromFile: %d layers estimated", estimate.EstimatedLayers)
cfg.NGPULayers = &estimate.EstimatedLayers
}
}
}

if cfg.NGPULayers == nil {
// we assume we want to offload all layers
defaultHigh := defaultNGPULayers
cfg.NGPULayers = &defaultHigh
}

log.Debug().Any("NGPULayers", cfg.NGPULayers).Msgf("guessDefaultsFromFile: %s", "NGPULayers set")

// template estimations
if cfg.HasTemplate() {
// nothing to guess here
log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set")
Expand All @@ -166,12 +208,12 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
log.Debug().
Any("eosTokenID", f.Tokenizer().EOSTokenID).
Any("bosTokenID", f.Tokenizer().BOSTokenID).
Any("modelName", f.Model().Name).
Any("modelName", f.Metadata().Name).
Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName())

// guess the name
if cfg.Name == "" {
cfg.Name = f.Model().Name
cfg.Name = f.Metadata().Name
}

family := identifyFamily(f)
Expand Down Expand Up @@ -207,6 +249,7 @@ func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
cfg.TemplateConfig.JinjaTemplate = true
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
}

}

func identifyFamily(f *gguf.GGUFFile) familyType {
Expand All @@ -231,7 +274,7 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
commandR := arch == "command-r" && eosTokenID == 255001
qwen2 := arch == "qwen2"
phi3 := arch == "phi-3"
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Metadata().Name), "gemma")
deepseek2 := arch == "deepseek2"

switch {
Expand Down
9 changes: 1 addition & 8 deletions core/config/guesser.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ import (
"os"
"path/filepath"

"github.com/mudler/LocalAI/pkg/xsysinfo"
gguf "github.com/gpustack/gguf-parser-go"
"github.com/rs/zerolog/log"
gguf "github.com/thxcode/gguf-parser-go"
)

func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int) {
Expand Down Expand Up @@ -36,10 +35,4 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string, defaultCtx int)
}
cfg.ContextSize = &defaultCtx
}

if cfg.Options == nil {
if xsysinfo.HasGPU("nvidia") || xsysinfo.HasGPU("amd") {
cfg.Options = []string{"gpu"}
}
}
}
6 changes: 4 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ require (
github.com/golang/protobuf v1.5.4
github.com/google/go-containerregistry v0.19.2
github.com/google/uuid v1.6.0
github.com/gpustack/gguf-parser-go v0.17.0
github.com/grpc-ecosystem/grpc-gateway v1.5.0
github.com/hpcloud/tail v1.0.0
github.com/ipfs/go-log v1.0.5
Expand Down Expand Up @@ -110,6 +111,7 @@ require (
github.com/pion/turn/v2 v2.1.6 // indirect
github.com/pion/turn/v4 v4.0.0 // indirect
github.com/pion/webrtc/v4 v4.0.9 // indirect
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect
github.com/savsgio/gotils v0.0.0-20230208104028-c358bd845dee // indirect
github.com/shirou/gopsutil/v4 v4.24.7 // indirect
github.com/wlynxg/anet v0.0.5 // indirect
Expand Down Expand Up @@ -188,7 +190,7 @@ require (
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/golang-lru v1.0.2 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/henvic/httpretty v0.1.3 // indirect
github.com/henvic/httpretty v0.1.4 // indirect
github.com/huandu/xstrings v1.5.0 // indirect
github.com/huin/goupnp v1.3.0 // indirect
github.com/ipfs/boxo v0.27.4 // indirect
Expand Down Expand Up @@ -278,7 +280,7 @@ require (
github.com/shoenig/go-m1cpu v0.1.6 // indirect
github.com/shopspring/decimal v1.4.0 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b // indirect
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/spf13/cast v1.7.0 // indirect
Expand Down
12 changes: 8 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gpustack/gguf-parser-go v0.17.0 h1:DkSziWLsiQM0pqqkr/zMcaBn94KY7iQTi4zmaHixDus=
github.com/gpustack/gguf-parser-go v0.17.0/go.mod h1:GvHh1Kvvq5ojCOsJ5UpwiJJmIjFw3Qk5cW7R+CZ3IJo=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/grpc-ecosystem/grpc-gateway v1.5.0 h1:WcmKMm43DR7RdtlkEXQJyo5ws8iTp98CyhCCbOHMvNI=
github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
Expand All @@ -307,8 +309,8 @@ github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iP
github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/henvic/httpretty v0.1.3 h1:4A6vigjz6Q/+yAfTD4wqipCv+Px69C7Th/NhT0ApuU8=
github.com/henvic/httpretty v0.1.3/go.mod h1:UUEv7c2kHZ5SPQ51uS3wBpzPDibg2U3Y+IaXyHy5GBg=
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
Expand Down Expand Up @@ -660,6 +662,8 @@ github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUc
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 h1:18kd+8ZUlt/ARXhljq+14TwAoKa61q6dX8jtwOf6DH8=
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529/go.mod h1:qe5TWALJ8/a1Lqznoc5BDHpYX/8HU60Hm2AwRmqzxqA=
github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/rs/zerolog v1.33.0 h1:1cU2KZkvPxNyfgEmhHAz/1A9Bz+llsdYzklWFzgp0r8=
github.com/rs/zerolog v1.33.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss=
Expand Down Expand Up @@ -712,8 +716,8 @@ github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b h1:e9eeuSYSLmUKxy7ALzKcxo7ggTceQaVcBhjDIcewa9c=
github.com/smallnest/ringbuffer v0.0.0-20240423223918-bab516b2000b/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
github.com/smartystreets/assertions v1.13.0 h1:Dx1kYM01xsSqKPno3aqLnrwac2LetPvN23diwyr69Qs=
github.com/smartystreets/assertions v1.13.0/go.mod h1:wDmR7qL282YbGsPy6H/yAsesrxfxaaSlJazyFLYVFx8=
Expand Down
52 changes: 52 additions & 0 deletions pkg/xsysinfo/gguf.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package xsysinfo

import (
"errors"

gguf "github.com/gpustack/gguf-parser-go"
)

type VRAMEstimate struct {
TotalVRAM uint64
AvailableVRAM uint64
ModelSize uint64
EstimatedLayers int
EstimatedVRAM uint64
IsFullOffload bool
}

func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) {
// Get model metadata
m := f.Metadata()
a := f.Architecture()

// Calculate base model size
modelSize := uint64(m.Size)

if a.BlockCount == 0 {
return nil, errors.New("block count is 0")
}

// Estimate number of layers that can fit in VRAM
// Each layer typically requires about 1/32 of the model size
layerSize := modelSize / uint64(a.BlockCount)
estimatedLayers := int(availableVRAM / layerSize)

// If we can't fit even one layer, we need to do full offload
isFullOffload := estimatedLayers <= 0
if isFullOffload {
estimatedLayers = 0
}

// Calculate estimated VRAM usage
estimatedVRAM := uint64(estimatedLayers) * layerSize

return &VRAMEstimate{
TotalVRAM: availableVRAM,
AvailableVRAM: availableVRAM,
ModelSize: modelSize,
EstimatedLayers: estimatedLayers,
EstimatedVRAM: estimatedVRAM,
IsFullOffload: isFullOffload,
}, nil
}
16 changes: 16 additions & 0 deletions pkg/xsysinfo/gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,22 @@ func GPUs() ([]*gpu.GraphicsCard, error) {
return gpu.GraphicsCards, nil
}

func TotalAvailableVRAM() (uint64, error) {
gpus, err := GPUs()
if err != nil {
return 0, err
}

var totalVRAM uint64
for _, gpu := range gpus {
if gpu.Node.Memory.TotalUsableBytes > 0 {
totalVRAM += uint64(gpu.Node.Memory.TotalUsableBytes)
}
}

return totalVRAM, nil
}

func HasGPU(vendor string) bool {
gpus, err := GPUs()
if err != nil {
Expand Down
Loading