Add one-liner DGX script to run llama-server with Qwen3.5-122B
All checks were successful
Deploy Docusaurus Site / deploy (push) Successful in 56s

This commit is contained in:
koide 2026-03-05 00:15:44 +00:00
parent 421ace0650
commit d8b3ac9102

View File

@ -0,0 +1,95 @@
#!/usr/bin/env bash
set -euo pipefail
# run-llama-qwen35-122b.sh
# DGX Spark / llama.cpp 向け起動スクリプトOpenAI互換API
#
# One-liner:
# curl -sL https://www.techswan.online/scripts/run-llama-qwen35-122b.sh | bash
#
# Env overrides:
# MODEL="unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M"
# PORT=8080
# CTX=204800
# BATCH=512
# UBATCH=256
# THREADS=8
# PARALLEL=1
# CACHE_K=q8_0
# CACHE_V=q8_0
# SWA_FULL=1
# NGL=999
# KILL_OLD=1
LLAMA_BIN="${LLAMA_BIN:-$HOME/llama.cpp/build-max/bin/llama-server}"
MODEL="${MODEL:-unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M}"
PORT="${PORT:-8080}"
CTX="${CTX:-204800}"
BATCH="${BATCH:-512}"
UBATCH="${UBATCH:-256}"
THREADS="${THREADS:-8}"
PARALLEL="${PARALLEL:-1}"
CACHE_K="${CACHE_K:-q8_0}"
CACHE_V="${CACHE_V:-q8_0}"
SWA_FULL="${SWA_FULL:-1}"
NGL="${NGL:-999}"
KILL_OLD="${KILL_OLD:-1}"
LOG_FILE="${LOG_FILE:-/tmp/llama-qwen35-122b.log}"
if [[ ! -x "$LLAMA_BIN" ]]; then
echo "[ERROR] llama-server not found: $LLAMA_BIN" >&2
echo " Build llama.cpp first (GGML_CUDA=ON)." >&2
exit 1
fi
if [[ "$KILL_OLD" == "1" ]]; then
pkill -f "llama-server.*Qwen3.5-122B-A10B" 2>/dev/null || true
pkill -f "llama-server .*${PORT}" 2>/dev/null || true
sleep 1
fi
ARGS=(
-hf "$MODEL"
--host 0.0.0.0 --port "$PORT"
-ngl "$NGL"
-c "$CTX"
-b "$BATCH"
-ub "$UBATCH"
-t "$THREADS"
--parallel "$PARALLEL"
--cache-type-k "$CACHE_K"
--cache-type-v "$CACHE_V"
--flash-attn on
)
if [[ "$SWA_FULL" == "1" ]]; then
ARGS+=(--swa-full)
fi
echo "[INFO] Starting llama-server"
echo " bin: $LLAMA_BIN"
echo " model: $MODEL"
echo " port: $PORT"
echo " ctx: $CTX"
echo " batch: $BATCH / ubatch: $UBATCH"
echo " kv: $CACHE_K / $CACHE_V"
echo " parallel:$PARALLEL"
echo " log: $LOG_FILE"
nohup "$LLAMA_BIN" "${ARGS[@]}" > "$LOG_FILE" 2>&1 &
PID=$!
echo "[OK] pid=$PID"
echo "[INFO] Waiting health..."
for i in {1..30}; do
if curl -fsS "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then
echo "[OK] health: http://127.0.0.1:${PORT}/health"
exit 0
fi
sleep 2
done
echo "[WARN] still loading. tail logs:"
echo " tail -f $LOG_FILE"