note-articles/static/scripts/run-llama-qwen35-122b.sh
koide d8b3ac9102
All checks were successful
Deploy Docusaurus Site / deploy (push) Successful in 56s
Add one-liner DGX script to run llama-server with Qwen3.5-122B
2026-03-05 00:15:44 +00:00

96 lines
2.1 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
set -euo pipefail
# run-llama-qwen35-122b.sh
# DGX Spark / llama.cpp 向け起動スクリプトOpenAI互換API
#
# One-liner:
# curl -sL https://www.techswan.online/scripts/run-llama-qwen35-122b.sh | bash
#
# Env overrides:
# MODEL="unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M"
# PORT=8080
# CTX=204800
# BATCH=512
# UBATCH=256
# THREADS=8
# PARALLEL=1
# CACHE_K=q8_0
# CACHE_V=q8_0
# SWA_FULL=1
# NGL=999
# KILL_OLD=1
LLAMA_BIN="${LLAMA_BIN:-$HOME/llama.cpp/build-max/bin/llama-server}"
MODEL="${MODEL:-unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M}"
PORT="${PORT:-8080}"
CTX="${CTX:-204800}"
BATCH="${BATCH:-512}"
UBATCH="${UBATCH:-256}"
THREADS="${THREADS:-8}"
PARALLEL="${PARALLEL:-1}"
CACHE_K="${CACHE_K:-q8_0}"
CACHE_V="${CACHE_V:-q8_0}"
SWA_FULL="${SWA_FULL:-1}"
NGL="${NGL:-999}"
KILL_OLD="${KILL_OLD:-1}"
LOG_FILE="${LOG_FILE:-/tmp/llama-qwen35-122b.log}"
if [[ ! -x "$LLAMA_BIN" ]]; then
echo "[ERROR] llama-server not found: $LLAMA_BIN" >&2
echo " Build llama.cpp first (GGML_CUDA=ON)." >&2
exit 1
fi
if [[ "$KILL_OLD" == "1" ]]; then
pkill -f "llama-server.*Qwen3.5-122B-A10B" 2>/dev/null || true
pkill -f "llama-server .*${PORT}" 2>/dev/null || true
sleep 1
fi
ARGS=(
-hf "$MODEL"
--host 0.0.0.0 --port "$PORT"
-ngl "$NGL"
-c "$CTX"
-b "$BATCH"
-ub "$UBATCH"
-t "$THREADS"
--parallel "$PARALLEL"
--cache-type-k "$CACHE_K"
--cache-type-v "$CACHE_V"
--flash-attn on
)
if [[ "$SWA_FULL" == "1" ]]; then
ARGS+=(--swa-full)
fi
echo "[INFO] Starting llama-server"
echo " bin: $LLAMA_BIN"
echo " model: $MODEL"
echo " port: $PORT"
echo " ctx: $CTX"
echo " batch: $BATCH / ubatch: $UBATCH"
echo " kv: $CACHE_K / $CACHE_V"
echo " parallel:$PARALLEL"
echo " log: $LOG_FILE"
nohup "$LLAMA_BIN" "${ARGS[@]}" > "$LOG_FILE" 2>&1 &
PID=$!
echo "[OK] pid=$PID"
echo "[INFO] Waiting health..."
for i in {1..30}; do
if curl -fsS "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then
echo "[OK] health: http://127.0.0.1:${PORT}/health"
exit 0
fi
sleep 2
done
echo "[WARN] still loading. tail logs:"
echo " tail -f $LOG_FILE"