Enha: whisper binary build with cuda

This commit is contained in:
Grail Finder
2025-12-07 12:51:45 +03:00
parent 5bbb134aca
commit 4d18d6e730
3 changed files with 46 additions and 15 deletions

View File

@@ -21,7 +21,7 @@ build-whisper: ## Build whisper.cpp from source in batteries directory
echo "Cloning whisper.cpp repository to batteries directory..."; \ echo "Cloning whisper.cpp repository to batteries directory..."; \
git clone https://github.com/ggml-org/whisper.cpp.git batteries/whisper.cpp; \ git clone https://github.com/ggml-org/whisper.cpp.git batteries/whisper.cpp; \
fi fi
cd batteries/whisper.cpp && make build cd batteries/whisper.cpp && cmake -B build -DGGML_CUDA=ON -DWHISPER_SDL2=ON; cmake --build build --config Release -j 8
@echo "Whisper binary built successfully!" @echo "Whisper binary built successfully!"
download-whisper-model: ## Download Whisper model for STT in batteries directory download-whisper-model: ## Download Whisper model for STT in batteries directory
@@ -34,22 +34,46 @@ download-whisper-model: ## Download Whisper model for STT in batteries directory
@echo "Whisper model downloaded successfully!" @echo "Whisper model downloaded successfully!"
# Docker targets for STT/TTS services (in batteries directory) # Docker targets for STT/TTS services (in batteries directory)
docker-up: ## Start Docker Compose services for STT and TTS from batteries directory docker-up: ## Start all Docker Compose services for STT and TTS from batteries directory
@echo "Starting Docker services for STT (whisper) and TTS (kokoro)..." @echo "Starting Docker services for STT (whisper) and TTS (kokoro)..."
@echo "Note: The Whisper model will be downloaded automatically inside the container on first run" @echo "Note: The Whisper model will be downloaded automatically inside the container on first run"
docker-compose -f batteries/docker-compose.yml up -d docker-compose -f batteries/docker-compose.yml up -d
@echo "Docker services started. STT available at http://localhost:8081, TTS available at http://localhost:8880" @echo "Docker services started. STT available at http://localhost:8081, TTS available at http://localhost:8880"
docker-down: ## Stop Docker Compose services from batteries directory docker-up-whisper: ## Start only the Whisper STT service
@echo "Starting Whisper STT service only..."
@echo "Note: The Whisper model will be downloaded automatically inside the container on first run"
docker-compose -f batteries/docker-compose.yml up -d whisper
@echo "Whisper STT service started. Available at http://localhost:8081"
docker-up-kokoro: ## Start only the Kokoro TTS service
@echo "Starting Kokoro TTS service only..."
docker-compose -f batteries/docker-compose.yml up -d kokoro-tts
@echo "Kokoro TTS service started. Available at http://localhost:8880"
docker-down: ## Stop all Docker Compose services from batteries directory
@echo "Stopping Docker services..." @echo "Stopping Docker services..."
docker-compose -f batteries/docker-compose.yml down docker-compose -f batteries/docker-compose.yml down
@echo "Docker services stopped" @echo "Docker services stopped"
docker-logs: ## View logs from Docker services in batteries directory docker-down-whisper: ## Stop only the Whisper STT service
@echo "Stopping Whisper STT service..."
docker-compose -f batteries/docker-compose.yml down whisper
@echo "Whisper STT service stopped"
docker-down-kokoro: ## Stop only the Kokoro TTS service
@echo "Stopping Kokoro TTS service..."
docker-compose -f batteries/docker-compose.yml down kokoro-tts
@echo "Kokoro TTS service stopped"
docker-logs: ## View logs from all Docker services in batteries directory
@echo "Displaying logs from Docker services..." @echo "Displaying logs from Docker services..."
docker-compose -f batteries/docker-compose.yml logs -f docker-compose -f batteries/docker-compose.yml logs -f
# Convenience target to setup everything docker-logs-whisper: ## View logs from Whisper STT service only
setup-complete: setup-whisper docker-up @echo "Displaying logs from Whisper STT service..."
@echo "Complete setup finished! STT and TTS services are running." docker-compose -f batteries/docker-compose.yml logs -f whisper
@echo "Note: Docker services will download the Whisper model automatically if not present."
docker-logs-kokoro: ## View logs from Kokoro TTS service only
@echo "Displaying logs from Kokoro TTS service..."
docker-compose -f batteries/docker-compose.yml logs -f kokoro-tts

View File

@@ -69,7 +69,14 @@ set values as you need them to be.
For speech-to-text (STT) and text-to-speech (TTS) functionality: For speech-to-text (STT) and text-to-speech (TTS) functionality:
1. The project uses Whisper.cpp for STT and Kokoro for TTS 1. The project uses Whisper.cpp for STT and Kokoro for TTS
2. Docker Compose automatically downloads the required Whisper model on first run 2. Docker Compose automatically downloads the required Whisper model on first run
3. To start the services: `make docker-up` 3. To start all services: `make docker-up`
4. To stop the services: `make docker-down` 4. To start only STT service: `make docker-up-whisper`
5. The STT service runs on http://localhost:8081 5. To start only TTS service: `make docker-up-kokoro`
6. The TTS service runs on http://localhost:8880 6. To stop all services: `make docker-down`
7. To stop only STT service: `make docker-down-whisper`
8. To stop only TTS service: `make docker-down-kokoro`
9. To view all service logs: `make docker-logs`
10. To view only STT service logs: `make docker-logs-whisper`
11. To view only TTS service logs: `make docker-logs-kokoro`
12. The STT service runs on http://localhost:8081
13. The TTS service runs on http://localhost:8880

View File

@@ -26,13 +26,13 @@ RAGDir = "ragimport"
# extra tts # extra tts
TTS_ENABLED = false TTS_ENABLED = false
TTS_URL = "http://localhost:8880/v1/audio/speech" TTS_URL = "http://localhost:8880/v1/audio/speech"
TTS_SPEED = 1.0 TTS_SPEED = 1.2
# extra stt # extra stt
STT_ENABLED = false STT_ENABLED = false
STT_TYPE = "WHISPER_SERVER" # WHISPER_SERVER or WHISPER_BINARY STT_TYPE = "WHISPER_SERVER" # WHISPER_SERVER or WHISPER_BINARY
STT_URL = "http://localhost:8081/inference" STT_URL = "http://localhost:8081/inference"
WhisperBinaryPath = "./batteries/whisper.cpp/whisper-cli" # Path to whisper binary (for WHISPER_BINARY mode) WhisperBinaryPath = "./batteries/whisper.cpp/build/bin/whisper-cli" # Path to whisper binary (for WHISPER_BINARY mode)
WhisperModelPath = "./batteries/whisper.cpp/ggml-model.bin" # Path to whisper model file (for WHISPER_BINARY mode) WhisperModelPath = "./batteries/whisper.cpp/ggml-large-v3-turbo.bin" # Path to whisper model file (for WHISPER_BINARY mode)
STT_LANG = "en" # Language for speech recognition (for WHISPER_BINARY mode) STT_LANG = "en" # Language for speech recognition (for WHISPER_BINARY mode)
STT_SR = 16000 # Sample rate for audio recording STT_SR = 16000 # Sample rate for audio recording
DBPATH = "gflt.db" DBPATH = "gflt.db"