Add one-liner installer+runner for llama.cpp on fresh DGX Spark

2026-03-05 00:18:28 +00:00 · 2026-03-05 00:18:28 +00:00 · c619b26c2f
commit c619b26c2f
parent d8b3ac9102
1 changed files with 97 additions and 0 deletions
--- a/static/scripts/install-and-run-llama-qwen35-122b.sh
+++ b/static/scripts/install-and-run-llama-qwen35-122b.sh
@ -0,0 +1,97 @@
 #!/usr/bin/env bash
 set -euo pipefail
 # install-and-run-llama-qwen35-122b.sh
 # 新規DGX Spark環境向け: llama.cpp ビルド + llama-server 起動（Qwen3.5-122B）
 #
 # One-liner:
 #   curl -sL https://www.techswan.online/scripts/install-and-run-llama-qwen35-122b.sh | bash
 REPO_DIR="${REPO_DIR:-$HOME/llama.cpp}"
 BUILD_DIR="${BUILD_DIR:-build-max}"
 JOBS="${JOBS:-$(nproc)}"
 MODEL="${MODEL:-unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M}"
 PORT="${PORT:-8080}"
 CTX="${CTX:-204800}"
 BATCH="${BATCH:-512}"
 UBATCH="${UBATCH:-256}"
 THREADS="${THREADS:-8}"
 PARALLEL="${PARALLEL:-1}"
 CACHE_K="${CACHE_K:-q8_0}"
 CACHE_V="${CACHE_V:-q8_0}"
 SWA_FULL="${SWA_FULL:-1}"
 NGL="${NGL:-999}"
 LOG_FILE="${LOG_FILE:-/tmp/llama-qwen35-122b.log}"
 echo "[1/5] Install build deps"
 sudo apt-get update -y
 sudo apt-get install -y git cmake build-essential libssl-dev curl
 echo "[2/5] Clone/Update llama.cpp"
 if [[ -d "$REPO_DIR/.git" ]]; then
  git -C "$REPO_DIR" pull --ff-only
 else
  git clone https://github.com/ggml-org/llama.cpp.git "$REPO_DIR"
 fi
 echo "[3/5] Configure + Build (CUDA ON)"
 cmake -S "$REPO_DIR" -B "$REPO_DIR/$BUILD_DIR" -DGGML_CUDA=ON -DLLAMA_OPENSSL=ON
 cmake --build "$REPO_DIR/$BUILD_DIR" -j "$JOBS" --target llama-server llama-cli
 BIN="$REPO_DIR/$BUILD_DIR/bin/llama-server"
 if [[ ! -x "$BIN" ]]; then
  echo "[ERROR] build succeeded but llama-server not found: $BIN" >&2
  exit 1
 fi
 echo "[4/5] Write run helper"
 cat > "$HOME/run-llama-qwen35-122b.sh" <<EOF
 #!/usr/bin/env bash
 set -euo pipefail
 MODEL="${MODEL}"
 PORT="${PORT}"
 CTX="${CTX}"
 BATCH="${BATCH}"
 UBATCH="${UBATCH}"
 THREADS="${THREADS}"
 PARALLEL="${PARALLEL}"
 CACHE_K="${CACHE_K}"
 CACHE_V="${CACHE_V}"
 SWA_FULL="${SWA_FULL}"
 NGL="${NGL}"
 LOG_FILE="${LOG_FILE}"
 pkill -f "llama-server.*Qwen3.5-122B-A10B" 2>/dev/null || true
 sleep 1
 ARGS=(
  -hf "\$MODEL"
  --host 0.0.0.0 --port "\$PORT"
  -ngl "\$NGL"
  -c "\$CTX" -b "\$BATCH" -ub "\$UBATCH"
  -t "\$THREADS"
  --parallel "\$PARALLEL"
  --cache-type-k "\$CACHE_K"
  --cache-type-v "\$CACHE_V"
  --flash-attn on
 )
 if [[ "\$SWA_FULL" == "1" ]]; then
  ARGS+=(--swa-full)
 fi
 nohup "$BIN" "\${ARGS[@]}" > "\$LOG_FILE" 2>&1 &
 echo "started: pid=\$! log=\$LOG_FILE"
 EOF
 chmod +x "$HOME/run-llama-qwen35-122b.sh"
 echo "[5/5] Start server"
 "$HOME/run-llama-qwen35-122b.sh"
 echo "[INFO] Waiting health..."
 for i in {1..40}; do
  if curl -fsS "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then
    echo "[OK] health ready: http://127.0.0.1:${PORT}/health"
    exit 0
  fi
  sleep 3
 done
 echo "[WARN] still loading model. check logs: tail -f ${LOG_FILE}"