gf-lt/batteries/docker-compose.yml

services:
  # Whisper.cpp STT service
  whisper:
    image: ghcr.io/ggml-org/whisper.cpp:main-cuda
    container_name: whisper-stt
    ports:
      - "8081:8081"
    volumes:
      - ./whisper.cpp/models/ggml-large-v3-turbo-q5_0.bin:/app/models/ggml-large-v3-turbo-q5_0.bin
    working_dir: /app
    entrypoint: ""
    command: >
      sh -c "
      if [ ! -f /app/models/ggml-large-v3-turbo-q5_0.bin ]; then
        echo 'Downloading ggml-large-v3-turboq5_0 model...'
      	curl -o /app/models/ggml-large-v3-turbo-q5_0.bin -L "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q5_0.bin?download=true"
      fi &&
      ./build/bin/whisper-server -m /app/models/ggml-large-v3-turbo-q5_0.bin -t 4 -p 1 --port 8081 --host 0.0.0.0
      "
    environment:
      - WHISPER_LOG_LEVEL=3
    # For GPU support, uncomment the following lines:
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    # Restart policy in case the service fails
    restart: unless-stopped


  # Kokoro-FastAPI TTS service
  kokoro-tts:
    # image: ghcr.io/remsky/kokoro-fastapi-cpu:latest
    image: ghcr.io/remsky/kokoro-fastapi-gpu:latest
    container_name: kokoro-tts
    ports:
      - "8880:8880"
    environment:
      - API_LOG_LEVEL=INFO
    # For GPU support, uncomment the following lines:
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    restart: unless-stopped

volumes:
  models:
    driver: local
  whisper_models:
    driver: local