All checks were successful
Deploy Docusaurus Site / deploy (push) Successful in 26s
72 lines
2.1 KiB
Bash
72 lines
2.1 KiB
Bash
#!/bin/bash
|
||
# DGX Spark - Qwen3-Coder-Next-FP8 起動スクリプト
|
||
# https://docs.techswan.online/tech/dgx-spark-qwen3-coder-next/
|
||
|
||
set -e
|
||
|
||
CONTAINER_NAME="qwen3-coder-next-fp8"
|
||
IMAGE="nvcr.io/nvidia/vllm:25.11-py3"
|
||
MODEL="Qwen/Qwen3-Coder-Next-FP8"
|
||
PORT="${PORT:-8000}"
|
||
MAX_MODEL_LEN="${MAX_MODEL_LEN:-32768}"
|
||
MAX_NUM_SEQS="${MAX_NUM_SEQS:-32}"
|
||
GPU_MEM_UTIL="${GPU_MEM_UTIL:-0.85}"
|
||
|
||
echo "=== DGX Spark - Qwen3-Coder-Next-FP8 起動スクリプト ==="
|
||
echo ""
|
||
|
||
# 既存コンテナ確認
|
||
if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then
|
||
echo "⚠️ 既存コンテナを停止・削除..."
|
||
docker stop "$CONTAINER_NAME" 2>/dev/null || true
|
||
docker rm "$CONTAINER_NAME" 2>/dev/null || true
|
||
fi
|
||
|
||
# イメージ確認・取得
|
||
if ! docker images --format '{{.Repository}}:{{.Tag}}' | grep -q "^${IMAGE}$"; then
|
||
echo "📦 NGC vLLMイメージを取得中..."
|
||
docker pull "$IMAGE"
|
||
fi
|
||
|
||
echo ""
|
||
echo "🚀 コンテナ起動..."
|
||
echo " Model: $MODEL"
|
||
echo " Port: $PORT"
|
||
echo " Max Context: $MAX_MODEL_LEN"
|
||
echo " GPU Memory: ${GPU_MEM_UTIL}%"
|
||
echo ""
|
||
|
||
docker run -d --name "$CONTAINER_NAME" \
|
||
--gpus all \
|
||
-p "${PORT}:8000" \
|
||
-v ~/.cache/huggingface:/root/.cache/huggingface \
|
||
--ipc=host \
|
||
"$IMAGE" \
|
||
vllm serve "$MODEL" \
|
||
--max-model-len "$MAX_MODEL_LEN" \
|
||
--max-num-seqs "$MAX_NUM_SEQS" \
|
||
--gpu-memory-utilization "$GPU_MEM_UTIL" \
|
||
--trust-remote-code
|
||
|
||
echo ""
|
||
echo "✅ コンテナ起動しました!"
|
||
echo ""
|
||
echo "📋 ログ確認:"
|
||
echo " docker logs -f $CONTAINER_NAME"
|
||
echo ""
|
||
echo "🔍 起動確認(Application startup complete が出るまで待つ):"
|
||
echo " 初回起動は15-20分かかります(モデルダウンロード + ロード)"
|
||
echo ""
|
||
echo "🧪 テスト:"
|
||
echo " curl http://localhost:${PORT}/health"
|
||
echo ""
|
||
echo "💬 チャット:"
|
||
cat << 'CURL_EXAMPLE'
|
||
curl http://localhost:8000/v1/chat/completions \
|
||
-H "Content-Type: application/json" \
|
||
-d '{
|
||
"model": "Qwen/Qwen3-Coder-Next-FP8",
|
||
"messages": [{"role": "user", "content": "Hello!"}]
|
||
}'
|
||
CURL_EXAMPLE
|