All checks were successful
Deploy Docusaurus Site / deploy (push) Successful in 56s
96 lines
2.1 KiB
Bash
Executable File
96 lines
2.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
set -euo pipefail
|
||
|
||
# run-llama-qwen35-122b.sh
|
||
# DGX Spark / llama.cpp 向け起動スクリプト(OpenAI互換API)
|
||
#
|
||
# One-liner:
|
||
# curl -sL https://www.techswan.online/scripts/run-llama-qwen35-122b.sh | bash
|
||
#
|
||
# Env overrides:
|
||
# MODEL="unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M"
|
||
# PORT=8080
|
||
# CTX=204800
|
||
# BATCH=512
|
||
# UBATCH=256
|
||
# THREADS=8
|
||
# PARALLEL=1
|
||
# CACHE_K=q8_0
|
||
# CACHE_V=q8_0
|
||
# SWA_FULL=1
|
||
# NGL=999
|
||
# KILL_OLD=1
|
||
|
||
LLAMA_BIN="${LLAMA_BIN:-$HOME/llama.cpp/build-max/bin/llama-server}"
|
||
MODEL="${MODEL:-unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M}"
|
||
PORT="${PORT:-8080}"
|
||
CTX="${CTX:-204800}"
|
||
BATCH="${BATCH:-512}"
|
||
UBATCH="${UBATCH:-256}"
|
||
THREADS="${THREADS:-8}"
|
||
PARALLEL="${PARALLEL:-1}"
|
||
CACHE_K="${CACHE_K:-q8_0}"
|
||
CACHE_V="${CACHE_V:-q8_0}"
|
||
SWA_FULL="${SWA_FULL:-1}"
|
||
NGL="${NGL:-999}"
|
||
KILL_OLD="${KILL_OLD:-1}"
|
||
LOG_FILE="${LOG_FILE:-/tmp/llama-qwen35-122b.log}"
|
||
|
||
if [[ ! -x "$LLAMA_BIN" ]]; then
|
||
echo "[ERROR] llama-server not found: $LLAMA_BIN" >&2
|
||
echo " Build llama.cpp first (GGML_CUDA=ON)." >&2
|
||
exit 1
|
||
fi
|
||
|
||
if [[ "$KILL_OLD" == "1" ]]; then
|
||
pkill -f "llama-server.*Qwen3.5-122B-A10B" 2>/dev/null || true
|
||
pkill -f "llama-server .*${PORT}" 2>/dev/null || true
|
||
sleep 1
|
||
fi
|
||
|
||
ARGS=(
|
||
-hf "$MODEL"
|
||
--host 0.0.0.0 --port "$PORT"
|
||
-ngl "$NGL"
|
||
-c "$CTX"
|
||
-b "$BATCH"
|
||
-ub "$UBATCH"
|
||
-t "$THREADS"
|
||
--parallel "$PARALLEL"
|
||
--cache-type-k "$CACHE_K"
|
||
--cache-type-v "$CACHE_V"
|
||
--flash-attn on
|
||
)
|
||
|
||
if [[ "$SWA_FULL" == "1" ]]; then
|
||
ARGS+=(--swa-full)
|
||
fi
|
||
|
||
echo "[INFO] Starting llama-server"
|
||
echo " bin: $LLAMA_BIN"
|
||
echo " model: $MODEL"
|
||
echo " port: $PORT"
|
||
echo " ctx: $CTX"
|
||
echo " batch: $BATCH / ubatch: $UBATCH"
|
||
echo " kv: $CACHE_K / $CACHE_V"
|
||
echo " parallel:$PARALLEL"
|
||
echo " log: $LOG_FILE"
|
||
|
||
nohup "$LLAMA_BIN" "${ARGS[@]}" > "$LOG_FILE" 2>&1 &
|
||
PID=$!
|
||
|
||
echo "[OK] pid=$PID"
|
||
|
||
echo "[INFO] Waiting health..."
|
||
for i in {1..30}; do
|
||
if curl -fsS "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then
|
||
echo "[OK] health: http://127.0.0.1:${PORT}/health"
|
||
exit 0
|
||
fi
|
||
sleep 2
|
||
|
||
done
|
||
|
||
echo "[WARN] still loading. tail logs:"
|
||
echo " tail -f $LOG_FILE"
|