Add one-liner installer+runner for llama.cpp on fresh DGX Spark
All checks were successful
Deploy Docusaurus Site / deploy (push) Successful in 51s
All checks were successful
Deploy Docusaurus Site / deploy (push) Successful in 51s
This commit is contained in:
parent
d8b3ac9102
commit
c619b26c2f
97
static/scripts/install-and-run-llama-qwen35-122b.sh
Executable file
97
static/scripts/install-and-run-llama-qwen35-122b.sh
Executable file
@ -0,0 +1,97 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# install-and-run-llama-qwen35-122b.sh
|
||||||
|
# 新規DGX Spark環境向け: llama.cpp ビルド + llama-server 起動(Qwen3.5-122B)
|
||||||
|
#
|
||||||
|
# One-liner:
|
||||||
|
# curl -sL https://www.techswan.online/scripts/install-and-run-llama-qwen35-122b.sh | bash
|
||||||
|
|
||||||
|
REPO_DIR="${REPO_DIR:-$HOME/llama.cpp}"
|
||||||
|
BUILD_DIR="${BUILD_DIR:-build-max}"
|
||||||
|
JOBS="${JOBS:-$(nproc)}"
|
||||||
|
|
||||||
|
MODEL="${MODEL:-unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M}"
|
||||||
|
PORT="${PORT:-8080}"
|
||||||
|
CTX="${CTX:-204800}"
|
||||||
|
BATCH="${BATCH:-512}"
|
||||||
|
UBATCH="${UBATCH:-256}"
|
||||||
|
THREADS="${THREADS:-8}"
|
||||||
|
PARALLEL="${PARALLEL:-1}"
|
||||||
|
CACHE_K="${CACHE_K:-q8_0}"
|
||||||
|
CACHE_V="${CACHE_V:-q8_0}"
|
||||||
|
SWA_FULL="${SWA_FULL:-1}"
|
||||||
|
NGL="${NGL:-999}"
|
||||||
|
LOG_FILE="${LOG_FILE:-/tmp/llama-qwen35-122b.log}"
|
||||||
|
|
||||||
|
echo "[1/5] Install build deps"
|
||||||
|
sudo apt-get update -y
|
||||||
|
sudo apt-get install -y git cmake build-essential libssl-dev curl
|
||||||
|
|
||||||
|
echo "[2/5] Clone/Update llama.cpp"
|
||||||
|
if [[ -d "$REPO_DIR/.git" ]]; then
|
||||||
|
git -C "$REPO_DIR" pull --ff-only
|
||||||
|
else
|
||||||
|
git clone https://github.com/ggml-org/llama.cpp.git "$REPO_DIR"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[3/5] Configure + Build (CUDA ON)"
|
||||||
|
cmake -S "$REPO_DIR" -B "$REPO_DIR/$BUILD_DIR" -DGGML_CUDA=ON -DLLAMA_OPENSSL=ON
|
||||||
|
cmake --build "$REPO_DIR/$BUILD_DIR" -j "$JOBS" --target llama-server llama-cli
|
||||||
|
|
||||||
|
BIN="$REPO_DIR/$BUILD_DIR/bin/llama-server"
|
||||||
|
if [[ ! -x "$BIN" ]]; then
|
||||||
|
echo "[ERROR] build succeeded but llama-server not found: $BIN" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[4/5] Write run helper"
|
||||||
|
cat > "$HOME/run-llama-qwen35-122b.sh" <<EOF
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
MODEL="${MODEL}"
|
||||||
|
PORT="${PORT}"
|
||||||
|
CTX="${CTX}"
|
||||||
|
BATCH="${BATCH}"
|
||||||
|
UBATCH="${UBATCH}"
|
||||||
|
THREADS="${THREADS}"
|
||||||
|
PARALLEL="${PARALLEL}"
|
||||||
|
CACHE_K="${CACHE_K}"
|
||||||
|
CACHE_V="${CACHE_V}"
|
||||||
|
SWA_FULL="${SWA_FULL}"
|
||||||
|
NGL="${NGL}"
|
||||||
|
LOG_FILE="${LOG_FILE}"
|
||||||
|
pkill -f "llama-server.*Qwen3.5-122B-A10B" 2>/dev/null || true
|
||||||
|
sleep 1
|
||||||
|
ARGS=(
|
||||||
|
-hf "\$MODEL"
|
||||||
|
--host 0.0.0.0 --port "\$PORT"
|
||||||
|
-ngl "\$NGL"
|
||||||
|
-c "\$CTX" -b "\$BATCH" -ub "\$UBATCH"
|
||||||
|
-t "\$THREADS"
|
||||||
|
--parallel "\$PARALLEL"
|
||||||
|
--cache-type-k "\$CACHE_K"
|
||||||
|
--cache-type-v "\$CACHE_V"
|
||||||
|
--flash-attn on
|
||||||
|
)
|
||||||
|
if [[ "\$SWA_FULL" == "1" ]]; then
|
||||||
|
ARGS+=(--swa-full)
|
||||||
|
fi
|
||||||
|
nohup "$BIN" "\${ARGS[@]}" > "\$LOG_FILE" 2>&1 &
|
||||||
|
echo "started: pid=\$! log=\$LOG_FILE"
|
||||||
|
EOF
|
||||||
|
chmod +x "$HOME/run-llama-qwen35-122b.sh"
|
||||||
|
|
||||||
|
echo "[5/5] Start server"
|
||||||
|
"$HOME/run-llama-qwen35-122b.sh"
|
||||||
|
|
||||||
|
echo "[INFO] Waiting health..."
|
||||||
|
for i in {1..40}; do
|
||||||
|
if curl -fsS "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then
|
||||||
|
echo "[OK] health ready: http://127.0.0.1:${PORT}/health"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 3
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "[WARN] still loading model. check logs: tail -f ${LOG_FILE}"
|
||||||
Loading…
x
Reference in New Issue
Block a user