Add one-liner DGX script to run llama-server with Qwen3.5-122B

2026-03-05 00:15:44 +00:00 · 2026-03-05 00:15:44 +00:00 · d8b3ac9102
commit d8b3ac9102
parent 421ace0650
1 changed files with 95 additions and 0 deletions
--- a/static/scripts/run-llama-qwen35-122b.sh
+++ b/static/scripts/run-llama-qwen35-122b.sh
@ -0,0 +1,95 @@
 #!/usr/bin/env bash
 set -euo pipefail
 # run-llama-qwen35-122b.sh
 # DGX Spark / llama.cpp 向け起動スクリプト（OpenAI互換API）
 #
 # One-liner:
 #   curl -sL https://www.techswan.online/scripts/run-llama-qwen35-122b.sh | bash
 #
 # Env overrides:
 #   MODEL="unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M"
 #   PORT=8080
 #   CTX=204800
 #   BATCH=512
 #   UBATCH=256
 #   THREADS=8
 #   PARALLEL=1
 #   CACHE_K=q8_0
 #   CACHE_V=q8_0
 #   SWA_FULL=1
 #   NGL=999
 #   KILL_OLD=1
 LLAMA_BIN="${LLAMA_BIN:-$HOME/llama.cpp/build-max/bin/llama-server}"
 MODEL="${MODEL:-unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M}"
 PORT="${PORT:-8080}"
 CTX="${CTX:-204800}"
 BATCH="${BATCH:-512}"
 UBATCH="${UBATCH:-256}"
 THREADS="${THREADS:-8}"
 PARALLEL="${PARALLEL:-1}"
 CACHE_K="${CACHE_K:-q8_0}"
 CACHE_V="${CACHE_V:-q8_0}"
 SWA_FULL="${SWA_FULL:-1}"
 NGL="${NGL:-999}"
 KILL_OLD="${KILL_OLD:-1}"
 LOG_FILE="${LOG_FILE:-/tmp/llama-qwen35-122b.log}"
 if [[ ! -x "$LLAMA_BIN" ]]; then
  echo "[ERROR] llama-server not found: $LLAMA_BIN" >&2
  echo "        Build llama.cpp first (GGML_CUDA=ON)." >&2
  exit 1
 fi
 if [[ "$KILL_OLD" == "1" ]]; then
  pkill -f "llama-server.*Qwen3.5-122B-A10B" 2>/dev/null || true
  pkill -f "llama-server .*${PORT}" 2>/dev/null || true
  sleep 1
 fi
 ARGS=(
  -hf "$MODEL"
  --host 0.0.0.0 --port "$PORT"
  -ngl "$NGL"
  -c "$CTX"
  -b "$BATCH"
  -ub "$UBATCH"
  -t "$THREADS"
  --parallel "$PARALLEL"
  --cache-type-k "$CACHE_K"
  --cache-type-v "$CACHE_V"
  --flash-attn on
 )
 if [[ "$SWA_FULL" == "1" ]]; then
  ARGS+=(--swa-full)
 fi
 echo "[INFO] Starting llama-server"
 echo "  bin:     $LLAMA_BIN"
 echo "  model:   $MODEL"
 echo "  port:    $PORT"
 echo "  ctx:     $CTX"
 echo "  batch:   $BATCH / ubatch: $UBATCH"
 echo "  kv:      $CACHE_K / $CACHE_V"
 echo "  parallel:$PARALLEL"
 echo "  log:     $LOG_FILE"
 nohup "$LLAMA_BIN" "${ARGS[@]}" > "$LOG_FILE" 2>&1 &
 PID=$!
 echo "[OK] pid=$PID"
 echo "[INFO] Waiting health..."
 for i in {1..30}; do
  if curl -fsS "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then
    echo "[OK] health: http://127.0.0.1:${PORT}/health"
    exit 0
  fi
  sleep 2
 done
 echo "[WARN] still loading. tail logs:"
 echo "  tail -f $LOG_FILE"