note-articles/static/scripts/vibevoice-asr/Dockerfile

# VibeVoice-ASR for DGX Spark (ARM64, Blackwell GB10, sm_121)
# Based on NVIDIA PyTorch container for CUDA 13.1 compatibility

ARG TARGETARCH
FROM nvcr.io/nvidia/pytorch:25.11-py3 AS base

LABEL maintainer="VibeVoice-ASR DGX Spark Setup"
LABEL description="VibeVoice-ASR optimized for DGX Spark (ARM64, CUDA 13.1)"

# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1

# PyTorch CUDA settings for DGX Spark
ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
ENV USE_LIBUV=0

# Set working directory
WORKDIR /workspace

# Install system dependencies including FFmpeg for demo
RUN apt-get update && apt-get install -y --no-install-recommends \
    ffmpeg \
    git \
    curl \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

# Install flash-attn if not already present
RUN pip install --no-cache-dir flash-attn --no-build-isolation || true

# Clone and install VibeVoice
RUN git clone https://github.com/microsoft/VibeVoice.git /workspace/VibeVoice && \
    cd /workspace/VibeVoice && \
    pip install --no-cache-dir -e .

# Create test script and patched demo with MKV support
COPY test_vibevoice.py /workspace/test_vibevoice.py
COPY vibevoice_asr_gradio_demo_patched.py /workspace/VibeVoice/demo/vibevoice_asr_gradio_demo.py

# Install real-time ASR dependencies
COPY requirements-realtime.txt /workspace/requirements-realtime.txt
RUN pip install --no-cache-dir -r /workspace/requirements-realtime.txt

# Copy real-time ASR module and startup scripts
COPY realtime/ /workspace/VibeVoice/realtime/
COPY static/ /workspace/VibeVoice/static/
COPY run_all.sh /workspace/VibeVoice/run_all.sh
COPY run_realtime.sh /workspace/VibeVoice/run_realtime.sh
RUN chmod +x /workspace/VibeVoice/run_all.sh /workspace/VibeVoice/run_realtime.sh

# Set default working directory to VibeVoice
WORKDIR /workspace/VibeVoice

# Expose Gradio port and WebSocket port
EXPOSE 7860
EXPOSE 8000

# Default command: Launch Gradio demo with MKV support
CMD ["python", "demo/vibevoice_asr_gradio_demo.py", "--model_path", "microsoft/VibeVoice-ASR", "--host", "0.0.0.0"]