note-articles/static/scripts/install-and-run-llama-qwen35-122b.sh
koide c619b26c2f
All checks were successful
Deploy Docusaurus Site / deploy (push) Successful in 51s
Add one-liner installer+runner for llama.cpp on fresh DGX Spark
2026-03-05 00:18:28 +00:00

98 lines
2.5 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
set -euo pipefail
# install-and-run-llama-qwen35-122b.sh
# 新規DGX Spark環境向け: llama.cpp ビルド + llama-server 起動Qwen3.5-122B
#
# One-liner:
# curl -sL https://www.techswan.online/scripts/install-and-run-llama-qwen35-122b.sh | bash
REPO_DIR="${REPO_DIR:-$HOME/llama.cpp}"
BUILD_DIR="${BUILD_DIR:-build-max}"
JOBS="${JOBS:-$(nproc)}"
MODEL="${MODEL:-unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M}"
PORT="${PORT:-8080}"
CTX="${CTX:-204800}"
BATCH="${BATCH:-512}"
UBATCH="${UBATCH:-256}"
THREADS="${THREADS:-8}"
PARALLEL="${PARALLEL:-1}"
CACHE_K="${CACHE_K:-q8_0}"
CACHE_V="${CACHE_V:-q8_0}"
SWA_FULL="${SWA_FULL:-1}"
NGL="${NGL:-999}"
LOG_FILE="${LOG_FILE:-/tmp/llama-qwen35-122b.log}"
echo "[1/5] Install build deps"
sudo apt-get update -y
sudo apt-get install -y git cmake build-essential libssl-dev curl
echo "[2/5] Clone/Update llama.cpp"
if [[ -d "$REPO_DIR/.git" ]]; then
git -C "$REPO_DIR" pull --ff-only
else
git clone https://github.com/ggml-org/llama.cpp.git "$REPO_DIR"
fi
echo "[3/5] Configure + Build (CUDA ON)"
cmake -S "$REPO_DIR" -B "$REPO_DIR/$BUILD_DIR" -DGGML_CUDA=ON -DLLAMA_OPENSSL=ON
cmake --build "$REPO_DIR/$BUILD_DIR" -j "$JOBS" --target llama-server llama-cli
BIN="$REPO_DIR/$BUILD_DIR/bin/llama-server"
if [[ ! -x "$BIN" ]]; then
echo "[ERROR] build succeeded but llama-server not found: $BIN" >&2
exit 1
fi
echo "[4/5] Write run helper"
cat > "$HOME/run-llama-qwen35-122b.sh" <<EOF
#!/usr/bin/env bash
set -euo pipefail
MODEL="${MODEL}"
PORT="${PORT}"
CTX="${CTX}"
BATCH="${BATCH}"
UBATCH="${UBATCH}"
THREADS="${THREADS}"
PARALLEL="${PARALLEL}"
CACHE_K="${CACHE_K}"
CACHE_V="${CACHE_V}"
SWA_FULL="${SWA_FULL}"
NGL="${NGL}"
LOG_FILE="${LOG_FILE}"
pkill -f "llama-server.*Qwen3.5-122B-A10B" 2>/dev/null || true
sleep 1
ARGS=(
-hf "\$MODEL"
--host 0.0.0.0 --port "\$PORT"
-ngl "\$NGL"
-c "\$CTX" -b "\$BATCH" -ub "\$UBATCH"
-t "\$THREADS"
--parallel "\$PARALLEL"
--cache-type-k "\$CACHE_K"
--cache-type-v "\$CACHE_V"
--flash-attn on
)
if [[ "\$SWA_FULL" == "1" ]]; then
ARGS+=(--swa-full)
fi
nohup "$BIN" "\${ARGS[@]}" > "\$LOG_FILE" 2>&1 &
echo "started: pid=\$! log=\$LOG_FILE"
EOF
chmod +x "$HOME/run-llama-qwen35-122b.sh"
echo "[5/5] Start server"
"$HOME/run-llama-qwen35-122b.sh"
echo "[INFO] Waiting health..."
for i in {1..40}; do
if curl -fsS "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then
echo "[OK] health ready: http://127.0.0.1:${PORT}/health"
exit 0
fi
sleep 3
done
echo "[WARN] still loading model. check logs: tail -f ${LOG_FILE}"