Add one-liner installer+runner for llama.cpp on fresh DGX Spark
All checks were successful
Deploy Docusaurus Site / deploy (push) Successful in 51s

This commit is contained in:
koide 2026-03-05 00:18:28 +00:00
parent d8b3ac9102
commit c619b26c2f

View File

@ -0,0 +1,97 @@
#!/usr/bin/env bash
set -euo pipefail
# install-and-run-llama-qwen35-122b.sh
# 新規DGX Spark環境向け: llama.cpp ビルド + llama-server 起動Qwen3.5-122B
#
# One-liner:
# curl -sL https://www.techswan.online/scripts/install-and-run-llama-qwen35-122b.sh | bash
REPO_DIR="${REPO_DIR:-$HOME/llama.cpp}"
BUILD_DIR="${BUILD_DIR:-build-max}"
JOBS="${JOBS:-$(nproc)}"
MODEL="${MODEL:-unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M}"
PORT="${PORT:-8080}"
CTX="${CTX:-204800}"
BATCH="${BATCH:-512}"
UBATCH="${UBATCH:-256}"
THREADS="${THREADS:-8}"
PARALLEL="${PARALLEL:-1}"
CACHE_K="${CACHE_K:-q8_0}"
CACHE_V="${CACHE_V:-q8_0}"
SWA_FULL="${SWA_FULL:-1}"
NGL="${NGL:-999}"
LOG_FILE="${LOG_FILE:-/tmp/llama-qwen35-122b.log}"
echo "[1/5] Install build deps"
sudo apt-get update -y
sudo apt-get install -y git cmake build-essential libssl-dev curl
echo "[2/5] Clone/Update llama.cpp"
if [[ -d "$REPO_DIR/.git" ]]; then
git -C "$REPO_DIR" pull --ff-only
else
git clone https://github.com/ggml-org/llama.cpp.git "$REPO_DIR"
fi
echo "[3/5] Configure + Build (CUDA ON)"
cmake -S "$REPO_DIR" -B "$REPO_DIR/$BUILD_DIR" -DGGML_CUDA=ON -DLLAMA_OPENSSL=ON
cmake --build "$REPO_DIR/$BUILD_DIR" -j "$JOBS" --target llama-server llama-cli
BIN="$REPO_DIR/$BUILD_DIR/bin/llama-server"
if [[ ! -x "$BIN" ]]; then
echo "[ERROR] build succeeded but llama-server not found: $BIN" >&2
exit 1
fi
echo "[4/5] Write run helper"
cat > "$HOME/run-llama-qwen35-122b.sh" <<EOF
#!/usr/bin/env bash
set -euo pipefail
MODEL="${MODEL}"
PORT="${PORT}"
CTX="${CTX}"
BATCH="${BATCH}"
UBATCH="${UBATCH}"
THREADS="${THREADS}"
PARALLEL="${PARALLEL}"
CACHE_K="${CACHE_K}"
CACHE_V="${CACHE_V}"
SWA_FULL="${SWA_FULL}"
NGL="${NGL}"
LOG_FILE="${LOG_FILE}"
pkill -f "llama-server.*Qwen3.5-122B-A10B" 2>/dev/null || true
sleep 1
ARGS=(
-hf "\$MODEL"
--host 0.0.0.0 --port "\$PORT"
-ngl "\$NGL"
-c "\$CTX" -b "\$BATCH" -ub "\$UBATCH"
-t "\$THREADS"
--parallel "\$PARALLEL"
--cache-type-k "\$CACHE_K"
--cache-type-v "\$CACHE_V"
--flash-attn on
)
if [[ "\$SWA_FULL" == "1" ]]; then
ARGS+=(--swa-full)
fi
nohup "$BIN" "\${ARGS[@]}" > "\$LOG_FILE" 2>&1 &
echo "started: pid=\$! log=\$LOG_FILE"
EOF
chmod +x "$HOME/run-llama-qwen35-122b.sh"
echo "[5/5] Start server"
"$HOME/run-llama-qwen35-122b.sh"
echo "[INFO] Waiting health..."
for i in {1..40}; do
if curl -fsS "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then
echo "[OK] health ready: http://127.0.0.1:${PORT}/health"
exit 0
fi
sleep 3
done
echo "[WARN] still loading model. check logs: tail -f ${LOG_FILE}"