#!/usr/bin/env bash set -euo pipefail # install-and-run-llama-qwen35-122b.sh # 新規DGX Spark環境向け: llama.cpp ビルド + llama-server 起動(Qwen3.5-122B) # # One-liner: # curl -sL https://www.techswan.online/scripts/install-and-run-llama-qwen35-122b.sh | bash REPO_DIR="${REPO_DIR:-$HOME/llama.cpp}" BUILD_DIR="${BUILD_DIR:-build-max}" JOBS="${JOBS:-$(nproc)}" MODEL="${MODEL:-unsloth/Qwen3.5-122B-A10B-GGUF:Q4_K_M}" PORT="${PORT:-8080}" CTX="${CTX:-204800}" BATCH="${BATCH:-512}" UBATCH="${UBATCH:-256}" THREADS="${THREADS:-8}" PARALLEL="${PARALLEL:-1}" CACHE_K="${CACHE_K:-q8_0}" CACHE_V="${CACHE_V:-q8_0}" SWA_FULL="${SWA_FULL:-1}" NGL="${NGL:-999}" LOG_FILE="${LOG_FILE:-/tmp/llama-qwen35-122b.log}" echo "[1/5] Install build deps" sudo apt-get update -y sudo apt-get install -y git cmake build-essential libssl-dev curl echo "[2/5] Clone/Update llama.cpp" if [[ -d "$REPO_DIR/.git" ]]; then git -C "$REPO_DIR" pull --ff-only else git clone https://github.com/ggml-org/llama.cpp.git "$REPO_DIR" fi echo "[3/5] Configure + Build (CUDA ON)" cmake -S "$REPO_DIR" -B "$REPO_DIR/$BUILD_DIR" -DGGML_CUDA=ON -DLLAMA_OPENSSL=ON cmake --build "$REPO_DIR/$BUILD_DIR" -j "$JOBS" --target llama-server llama-cli BIN="$REPO_DIR/$BUILD_DIR/bin/llama-server" if [[ ! -x "$BIN" ]]; then echo "[ERROR] build succeeded but llama-server not found: $BIN" >&2 exit 1 fi echo "[4/5] Write run helper" cat > "$HOME/run-llama-qwen35-122b.sh" </dev/null || true sleep 1 ARGS=( -hf "\$MODEL" --host 0.0.0.0 --port "\$PORT" -ngl "\$NGL" -c "\$CTX" -b "\$BATCH" -ub "\$UBATCH" -t "\$THREADS" --parallel "\$PARALLEL" --cache-type-k "\$CACHE_K" --cache-type-v "\$CACHE_V" --flash-attn on ) if [[ "\$SWA_FULL" == "1" ]]; then ARGS+=(--swa-full) fi nohup "$BIN" "\${ARGS[@]}" > "\$LOG_FILE" 2>&1 & echo "started: pid=\$! log=\$LOG_FILE" EOF chmod +x "$HOME/run-llama-qwen35-122b.sh" echo "[5/5] Start server" "$HOME/run-llama-qwen35-122b.sh" echo "[INFO] Waiting health..." for i in {1..40}; do if curl -fsS "http://127.0.0.1:${PORT}/health" >/dev/null 2>&1; then echo "[OK] health ready: http://127.0.0.1:${PORT}/health" exit 0 fi sleep 3 done echo "[WARN] still loading model. check logs: tail -f ${LOG_FILE}"