环境配置
| IP | nvidia-smi版本 | cuda版本 | 操作系统 | 显卡 | 
|---|---|---|---|---|
| 192.168.10.30 | 550.120 | 12.4 | 22.04 LTS | NVIDIA A10 | 
一.安装显卡驱动
1.查看显卡
root@gpu:~# apt update
root@gpu:~# lspci | grep -i nvidia
00:07.0 3D controller: NVIDIA Corporation GA102GL [A10] (rev a1)
2.安装驱动
#推荐我们使用550驱动
root@gpu:~# ubuntu-drivers devices
model    : GA102GL [A10]
driver   : nvidia-driver-550 - distro non-free recommended
#安装550驱动
apt install -y nvidia-driver-550
3.查看安装好的驱动
root@gpu:~# nvidia-smi
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|=========================================+========================+======================|
|   0  NVIDIA A10                     Off |   00000000:00:07.0 Off |                  Off |
|  0%   36C    P8             10W /  150W |       1MiB /  24564MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes:                                                                              |
|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |
|        ID   ID                                                               Usage      |
|=========================================================================================|
|  No running processes found                                                             |
+-----------------------------------------------------------------------------------------+
二.安装cuda toolkit
因为安装的nvidia版本是550,所以我们安装cuda是对应的12.4
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
dpkg -i cuda-keyring_1.1-1_all.deb
apt-get update
apt-get -y install cuda-toolkit-12-4
root@gpu:~# vim /etc/profile
export PATH=$PATH:/usr/local/cuda-12.4/bin/
root@gpu:~# source /etc/profile
三.重启
reboot
四.安装docker
apt install -y apt-transport-https ca-certificates curl software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
apt update
apt install -y docker-ce docker-ce-cli containerd.io
systemctl start docker
systemctl enable docker
五.安装 nvidia-container-toolkit
1.安装nvidia-container-toolkit
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
  && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
    sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
    sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list
apt-get update
apt-get install -y nvidia-container-toolkit
2.使用nvidia-ctk配置容器运行时为docker
nvidia-ctk runtime configure --runtime=docker
systemctl restart docker
3.配置镜像仓库
root@gpu:~# vim /etc/docker/daemon.json
{
    "registry-mirrors": [
            "https://docker.211678.top",
            "https://docker.1panel.live",
            "https://hub.rat.dev",
            "https://docker.m.daocloud.io",
            "https://do.nark.eu.org",
            "https://dockerpull.com",
            "https://dockerproxy.cn",
            "https://docker.awsl9527.cn"
      ],
    "runtimes": {
        "nvidia": {
            "args": [],
            "path": "nvidia-container-runtime"
        }
    }
}
root@gpu:~# systemctl daemon-reload
root@gpu:~# systemctl restart docker
六.部署deepseek-R1-7B
1.拉取vllm镜像
#官方镜像
vllm/vllm-openai:latest
ghcr.io/open-webui/open-webui:cuda
#自建阿里云镜像仓库
registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:vllm-openai
registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:open-webui-cuda
docker pull registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:vllm-openai
docker pull registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:open-webui-cuda
2.下载deepseek-R1-7B模型(二选一)
2.1modelscope
https://modelscope.cn/home
mkdir /root/model
pip install modelscope setuptools
modelscope download --model deepseek-ai/deepseek-r1-7b --local_dir /root/model
2.2huggingface
https://huggingface.co/
pip install huggingface_hub
mkdir /root/model
export HF_ENDPOINT=https://hf-mirror.com
huggingface-cli download --resume-download deepseek-ai/deepseek-r1-7b --local-dir /root/model
3.部署vllm-openai
https://github.com/docker/compose/releases
chmod +x /usr/local/bin/docker-compose
root@gpu:~# vim docker-compose.yml
version: '3'
services:
  vllm_service:
    image: registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:vllm-openai
    container_name: vllm_deepseek_7b
    restart: always
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    ports:
      - "8000:8000"
    volumes:
      - /root/model:/app/model
    command: [
      "--served-model-name", "DeepSeek-R1:7B",
      "--trust-remote-code",
      "--enforce-eager",
      "--enable-chunked-prefill",
      "--gpu-memory-utilization", "0.9",
      "--model", "/app/model/",
      "--host", "0.0.0.0",
      "--port", "8000",
      "--max-model-len", "10000",
      "--api-key", "12345678",
      "--tokenizer", "/app/model/",
      "--tensor-parallel-size", "1"
    ]
root@gpu:~# docker-compose up -d
4.部署open-webui
docker run -d \
  --name openwebui-container \
  --network host \
  --gpus all \
  -e OPENAI_API_BASE_URL=http://192.168.10.30:8000/v1 \
  -e OPENAI_API_KEYS=12345678 \
  -e USE_CUDA_DOCKER=true \
  registry.cn-beijing.aliyuncs.com/tools_cluster/tools_cluster:open-webui-cuda
5.网页查看

6.网页问问题后查看日志


7.查看显卡使用率
root@gpu:~# nvidia-smi
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|=========================================+========================+======================|
|   0  NVIDIA A10                     On  |   00000000:00:07.0 Off |                  Off |
|  0%   59C    P0            149W /  150W |   21805MiB /  24564MiB |     98%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes:                                                                              |
|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |
|        ID   ID                                                               Usage      |
|=========================================================================================|
|    0   N/A  N/A     15903      C   python3                                     21796MiB |
+-----------------------------------------------------------------------------------------+
                      
                      
                    
            
          
            
评论区