services: # Whisper.cpp STT service whisper: image: ghcr.io/ggml-org/whisper.cpp:main-cuda container_name: whisper-stt ports: - "8081:8081" volumes: - ./whisper.cpp/models/ggml-large-v3-turbo-q5_0.bin:/app/models/ggml-large-v3-turbo-q5_0.bin working_dir: /app entrypoint: "" command: > sh -c " if [ ! -f /app/models/ggml-large-v3-turbo-q5_0.bin ]; then echo 'Downloading ggml-large-v3-turboq5_0 model...' curl -o /app/models/ggml-large-v3-turbo-q5_0.bin -L "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo-q5_0.bin?download=true" fi && ./build/bin/whisper-server -m /app/models/ggml-large-v3-turbo-q5_0.bin -t 4 -p 1 --port 8081 --host 0.0.0.0 " environment: - WHISPER_LOG_LEVEL=3 # For GPU support, uncomment the following lines: deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] # Restart policy in case the service fails restart: unless-stopped # Kokoro-FastAPI TTS service kokoro-tts: # image: ghcr.io/remsky/kokoro-fastapi-cpu:latest image: ghcr.io/remsky/kokoro-fastapi-gpu:latest container_name: kokoro-tts ports: - "8880:8880" environment: - API_LOG_LEVEL=INFO # For GPU support, uncomment the following lines: deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] restart: unless-stopped volumes: models: driver: local whisper_models: driver: local