Skip to content

Latest commit

 

History

History
204 lines (142 loc) · 5.61 KB

File metadata and controls

204 lines (142 loc) · 5.61 KB

Ollama手动安装与使用

  • 下载安装包,上传至服务器

  • 解压安装包

      # tar -zxf ollama-linux-amd64.tgz -C /usr/local
      # tar -xf ollama-linux-amd64.tar.zst -C /usr/local
    
  • 添加环境变量

      # vim /root/.bashrc
      export OLLAMA_HOST=http://127.0.0.1:11434
      export OLLAMA_MODELS=/home/main_data/ollama/models
    
  • 创建开机自启配置

      # vim  /etc/systemd/system/ollama.service
      [Unit]
      Description=Ollama Service
      After=network-online.target
      
      [Service]
      ExecStart=/usr/local/bin/ollama serve
      User=ollama
      Group=ollama
      Restart=always
      RestartSec=3
      Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin"
      
      [Install]
      WantedBy=default.target
    
  • 创建ollama用户

      # useradd -m ollama
    
  • 设置开机启动

      # systemctl enable ollama
    
  • 配置启动环境变量

      # systemctl set-environment OLLAMA_HOST=http://127.0.0.1:11434
    
  • 启动服务

      # systemctl start ollama
    
  • 查看服务状态

      # systemctl status ollama
      ● ollama.service - Ollama Service
           Loaded: loaded (/etc/systemd/system/ollama.service; enabled; vendor preset: enabled)
           Active: active (running) since Mon 2025-03-17 06:46:01 UTC; 8min ago
         Main PID: 7119 (ollama)
            Tasks: 17 (limit: 154112)
           Memory: 22.8M
              CPU: 388ms
           CGroup: /system.slice/ollama.service
                   └─7119 /usr/local/bin/ollama serve
      
      Mar 17 06:46:01 jicheng systemd[1]: Started Ollama Service.
      Mar 17 06:46:01 jicheng ollama[7119]: 2025/03/17 06:46:01 routes.go:1230: INFO server config env="map[CUDA_VISIBLE_DEVICES: GPU_DEVI>
      Mar 17 06:46:01 jicheng ollama[7119]: time=2025-03-17T06:46:01.440Z level=INFO source=images.go:432 msg="total blobs: 0"
      Mar 17 06:46:01 jicheng ollama[7119]: time=2025-03-17T06:46:01.440Z level=INFO source=images.go:439 msg="total unused blobs removed:>
      Mar 17 06:46:01 jicheng ollama[7119]: time=2025-03-17T06:46:01.441Z level=INFO source=routes.go:1297 msg="Listening on 127.0.0.1:114>
      Mar 17 06:46:01 jicheng ollama[7119]: time=2025-03-17T06:46:01.441Z level=INFO source=gpu.go:217 msg="looking for compatible GPUs"
      Mar 17 06:46:01 jicheng ollama[7119]: time=2025-03-17T06:46:01.811Z level=INFO source=types.go:130 msg="inference compute" id=GPU-96>
      Mar 17 06:46:01 jicheng ollama[7119]: time=2025-03-17T06:46:01.811Z level=INFO source=types.go:130 msg="inference compute" id=GPU-be>
      Mar 17 06:46:28 jicheng ollama[7119]: [GIN] 2025/03/17 - 06:46:28 | 200 |      29.614µs |       127.0.0.1 | GET      "/api/version"
    
  • 如果异常则查看日志

      # journalctl -u service-name.service | tail -n 10
    
  • 查看版本

      # ollama -v
      ollama version is 0.6.1
    
  • 拉取模型

      # OLLAMA_MIRROR="https://registry.ollama.ai" ollama pull deepseek-r1:7b
      # ollama run qwen2.5:7b
    
  • 查看正在运行的模型

      # ollama ps
      NAME               ID              SIZE     PROCESSOR    UNTIL              
      deepseek-r1:70b    0c1615a8ca32    49 GB    100% GPU     4 minutes from now
    

    查询模型和参数

    • 去Ollama平台搜索

    • 拉取示例

      • 下载模型并进入交互式对话(或执行一次推理)

          # ollama run qwen2.5vl
      • 指定模型具体 参数

        # ollama run qwen2.5vl:7b
      • 仅负责下载模型到本地

        # ollama pull qwen2.5vl:7b
  • 查看已安装模型的详细参数

    root@gpu:~# ollama show qwen3.5:27b
      Model
        architecture        qwen35    
        parameters          27.8B     
        context length      262144    
        embedding length    5120      
        quantization        Q4_K_M    
        requires            0.17.1    
    
      Capabilities
        completion    
        vision        
        tools         
        thinking      
    
      Parameters
        top_p               0.95    
        presence_penalty    1.5     
        temperature         1       
        top_k               20      
    
      License
        Apache License               
        Version 2.0, January 2004    
        ...                          
    
    root@gpu:~# 
    
  • 查看当前运行模型的状态

    root@gpu:~# ollama ps
    NAME           ID              SIZE     PROCESSOR    CONTEXT    UNTIL   
    qwen3.5:27b    7653528ba5cb    26 GB    100% GPU     32768      Forever    
    
  • 关于模型的上下文context

    • Ollama 会根据 GPU 显存大小自动决定默认上下文,规则如下:

      GPU 显存 默认上下文
      < 24 GiB 4k
      24–48 GiB 32k
      ≥ 48 GiB 256k
      • PS:即使模型本身支持更大的值,但 Ollama 选择了保守值以防显存溢出。

      • Ollama v0.5.13 开始,支持通过环境变量 OLLAMA_CONTEXT_LENGTH 修改全局默认值。

      • 因此修改配置文件,可以把默认的32k改成64k。

        root@gpu:~# cat /etc/systemd/system/ollama.service
        [Unit]
        Description=Ollama Service
        After=network-online.target
        
        [Service]
        ExecStart=/usr/local/bin/ollama serve
        User=ollama
        Group=ollama
        Restart=always
        RestartSec=3
        Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin"
        Environment="OLLAMA_HOST=0.0.0.0:11434"
        Environment="OLLAMA_NUM_GPU=2"
        Environment="OLLAMA_NUM_THREADS=8"
        Environment="OLLAMA_CONTEXT_LENGTH=65536"
        
        [Install]
        WantedBy=default.target
        root@gpu:~#