note-articles/static/scripts/dgx-spark-qwen3-coder.sh
koide fd7fe6f991
All checks were successful
Deploy Docusaurus Site / deploy (push) Successful in 26s
Add: DGX SparkでQwen3-Coder-Next(80B MoE)を動かす
2026-02-19 11:31:37 +00:00

72 lines
2.1 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# DGX Spark - Qwen3-Coder-Next-FP8 起動スクリプト
# https://docs.techswan.online/tech/dgx-spark-qwen3-coder-next/
set -e
CONTAINER_NAME="qwen3-coder-next-fp8"
IMAGE="nvcr.io/nvidia/vllm:25.11-py3"
MODEL="Qwen/Qwen3-Coder-Next-FP8"
PORT="${PORT:-8000}"
MAX_MODEL_LEN="${MAX_MODEL_LEN:-32768}"
MAX_NUM_SEQS="${MAX_NUM_SEQS:-32}"
GPU_MEM_UTIL="${GPU_MEM_UTIL:-0.85}"
echo "=== DGX Spark - Qwen3-Coder-Next-FP8 起動スクリプト ==="
echo ""
# 既存コンテナ確認
if docker ps -a --format '{{.Names}}' | grep -q "^${CONTAINER_NAME}$"; then
echo "⚠️ 既存コンテナを停止・削除..."
docker stop "$CONTAINER_NAME" 2>/dev/null || true
docker rm "$CONTAINER_NAME" 2>/dev/null || true
fi
# イメージ確認・取得
if ! docker images --format '{{.Repository}}:{{.Tag}}' | grep -q "^${IMAGE}$"; then
echo "📦 NGC vLLMイメージを取得中..."
docker pull "$IMAGE"
fi
echo ""
echo "🚀 コンテナ起動..."
echo " Model: $MODEL"
echo " Port: $PORT"
echo " Max Context: $MAX_MODEL_LEN"
echo " GPU Memory: ${GPU_MEM_UTIL}%"
echo ""
docker run -d --name "$CONTAINER_NAME" \
--gpus all \
-p "${PORT}:8000" \
-v ~/.cache/huggingface:/root/.cache/huggingface \
--ipc=host \
"$IMAGE" \
vllm serve "$MODEL" \
--max-model-len "$MAX_MODEL_LEN" \
--max-num-seqs "$MAX_NUM_SEQS" \
--gpu-memory-utilization "$GPU_MEM_UTIL" \
--trust-remote-code
echo ""
echo "✅ コンテナ起動しました!"
echo ""
echo "📋 ログ確認:"
echo " docker logs -f $CONTAINER_NAME"
echo ""
echo "🔍 起動確認Application startup complete が出るまで待つ):"
echo " 初回起動は15-20分かかりますモデルダウンロード + ロード)"
echo ""
echo "🧪 テスト:"
echo " curl http://localhost:${PORT}/health"
echo ""
echo "💬 チャット:"
cat << 'CURL_EXAMPLE'
curl http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "Qwen/Qwen3-Coder-Next-FP8",
"messages": [{"role": "user", "content": "Hello!"}]
}'
CURL_EXAMPLE