diff --git a/static/scripts/install-and-run-llama-qwen35-122b.sh b/static/scripts/install-and-run-llama-qwen35-122b.sh new file mode 100755 index 0000000..aab2b87 --- /dev/null +++ b/static/scripts/install-and-run-llama-qwen35-122b.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +set -euo pipefail + +# install-and-run-llama-qwen35-122b.sh +# 新規DGX Spark環境向け: llama.cpp ビルド + llama-server 起動(Qwen3.5-122B) +# +# One-liner: +# curl -sL https://www.techswan.online/scripts/install-and-run-llama-qwen35-122b.sh | bash + +REPO_DIR="${REPO_DIR:-$HOME/llama.cpp}" +BUILD_DIR="${BUILD_DIR:-build-max}" +JOBS="${JOBS:-$(nproc)}" + +MODEL="${MODEL:-unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M}" +PORT="${PORT:-8080}" +CTX="${CTX:-204800}" +BATCH="${BATCH:-512}" +UBATCH="${UBATCH:-256}" +THREADS="${THREADS:-8}" +PARALLEL="${PARALLEL:-1}" +CACHE_K="${CACHE_K:-q8_0}" +CACHE_V="${CACHE_V:-q8_0}" +SWA_FULL="${SWA_FULL:-1}" +NGL="${NGL:-999}" +LOG_FILE="${LOG_FILE:-/tmp/llama-qwen35-122b.log}" + +echo "[1/5] Install build deps" +sudo apt-get update -y +sudo apt-get install -y git cmake build-essential libssl-dev curl + +echo "[2/5] Clone/Update llama.cpp" +if [[ -d "$REPO_DIR/.git" ]]; then + git -C "$REPO_DIR" pull --ff-only +else + git clone https://github.com/ggml-org/llama.cpp.git "$REPO_DIR" +fi + +echo "[3/5] Configure + Build (CUDA ON)" +cmake -S "$REPO_DIR" -B "$REPO_DIR/$BUILD_DIR" -DGGML_CUDA=ON -DLLAMA_OPENSSL=ON +cmake --build "$REPO_DIR/$BUILD_DIR" -j "$JOBS" --target llama-server llama-cli + +BIN="$REPO_DIR/$BUILD_DIR/bin/llama-server" +if [[ ! -x "$BIN" ]]; then + echo "[ERROR] build succeeded but llama-server not found: $BIN" >&2 + exit 1 +fi + +echo "[4/5] Write run helper" +cat > "$HOME/run-llama-qwen35-122b.sh" </dev/null || true +sleep 1 +ARGS=( + -hf "\$MODEL" + --host 0.0.0.0 --port "\$PORT" + -ngl "\$NGL" + -c "\$CTX" -b "\$BATCH" -ub "\$UBATCH" + -t "\$THREADS" + --parallel "\$PARALLEL" + --cache-type-k "\$CACHE_K" + --cache-type-v "\$CACHE_V" + --flash-attn on +) +if [[ "\$SWA_FULL" == "1" ]]; then + ARGS+=(--swa-full) +fi +nohup "$BIN" "\${ARGS[@]}" > "\$LOG_FILE" 2>&1 & +echo "started: pid=\$! log=\$LOG_FILE" +EOF +chmod +x "$HOME/run-llama-qwen35-122b.sh" + +echo "[5/5] Start server" +"$HOME/run-llama-qwen35-122b.sh" + +echo "[INFO] Waiting health..." +for i in {1..40}; do + if curl -fsS "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then + echo "[OK] health ready: http://127.0.0.1:${PORT}/health" + exit 0 + fi + sleep 3 +done + +echo "[WARN] still loading model. check logs: tail -f ${LOG_FILE}"