koide 1fb76254e9
All checks were successful
Deploy Docusaurus Site / deploy (push) Successful in 27s
Add: VibeVoice ASR セットアップスクリプト一式
2026-02-24 01:21:33 +00:00

62 lines
2.1 KiB
Docker

# VibeVoice-ASR for DGX Spark (ARM64, Blackwell GB10, sm_121)
# Based on NVIDIA PyTorch container for CUDA 13.1 compatibility
ARG TARGETARCH
FROM nvcr.io/nvidia/pytorch:25.11-py3 AS base
LABEL maintainer="VibeVoice-ASR DGX Spark Setup"
LABEL description="VibeVoice-ASR optimized for DGX Spark (ARM64, CUDA 13.1)"
# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
# PyTorch CUDA settings for DGX Spark
ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
ENV USE_LIBUV=0
# Set working directory
WORKDIR /workspace
# Install system dependencies including FFmpeg for demo
RUN apt-get update && apt-get install -y --no-install-recommends \
ffmpeg \
git \
curl \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install flash-attn if not already present
RUN pip install --no-cache-dir flash-attn --no-build-isolation || true
# Clone and install VibeVoice
RUN git clone https://github.com/microsoft/VibeVoice.git /workspace/VibeVoice && \
cd /workspace/VibeVoice && \
pip install --no-cache-dir -e .
# Create test script and patched demo with MKV support
COPY test_vibevoice.py /workspace/test_vibevoice.py
COPY vibevoice_asr_gradio_demo_patched.py /workspace/VibeVoice/demo/vibevoice_asr_gradio_demo.py
# Install real-time ASR dependencies
COPY requirements-realtime.txt /workspace/requirements-realtime.txt
RUN pip install --no-cache-dir -r /workspace/requirements-realtime.txt
# Copy real-time ASR module and startup scripts
COPY realtime/ /workspace/VibeVoice/realtime/
COPY static/ /workspace/VibeVoice/static/
COPY run_all.sh /workspace/VibeVoice/run_all.sh
COPY run_realtime.sh /workspace/VibeVoice/run_realtime.sh
RUN chmod +x /workspace/VibeVoice/run_all.sh /workspace/VibeVoice/run_realtime.sh
# Set default working directory to VibeVoice
WORKDIR /workspace/VibeVoice
# Expose Gradio port and WebSocket port
EXPOSE 7860
EXPOSE 8000
# Default command: Launch Gradio demo with MKV support
CMD ["python", "demo/vibevoice_asr_gradio_demo.py", "--model_path", "microsoft/VibeVoice-ASR", "--host", "0.0.0.0"]