Enha: whisper binary build with cuda

This commit is contained in:
Grail Finder
2025-12-07 12:51:45 +03:00
parent 5bbb134aca
commit 4d18d6e730
3 changed files with 46 additions and 15 deletions

View File

@@ -21,7 +21,7 @@ build-whisper: ## Build whisper.cpp from source in batteries directory
echo "Cloning whisper.cpp repository to batteries directory..."; \
git clone https://github.com/ggml-org/whisper.cpp.git batteries/whisper.cpp; \
fi
cd batteries/whisper.cpp && make build
cd batteries/whisper.cpp && cmake -B build -DGGML_CUDA=ON -DWHISPER_SDL2=ON; cmake --build build --config Release -j 8
@echo "Whisper binary built successfully!"
download-whisper-model: ## Download Whisper model for STT in batteries directory
@@ -34,22 +34,46 @@ download-whisper-model: ## Download Whisper model for STT in batteries directory
@echo "Whisper model downloaded successfully!"
# Docker targets for STT/TTS services (in batteries directory)
docker-up: ## Start Docker Compose services for STT and TTS from batteries directory
docker-up: ## Start all Docker Compose services for STT and TTS from batteries directory
@echo "Starting Docker services for STT (whisper) and TTS (kokoro)..."
@echo "Note: The Whisper model will be downloaded automatically inside the container on first run"
docker-compose -f batteries/docker-compose.yml up -d
@echo "Docker services started. STT available at http://localhost:8081, TTS available at http://localhost:8880"
docker-down: ## Stop Docker Compose services from batteries directory
docker-up-whisper: ## Start only the Whisper STT service
@echo "Starting Whisper STT service only..."
@echo "Note: The Whisper model will be downloaded automatically inside the container on first run"
docker-compose -f batteries/docker-compose.yml up -d whisper
@echo "Whisper STT service started. Available at http://localhost:8081"
docker-up-kokoro: ## Start only the Kokoro TTS service
@echo "Starting Kokoro TTS service only..."
docker-compose -f batteries/docker-compose.yml up -d kokoro-tts
@echo "Kokoro TTS service started. Available at http://localhost:8880"
docker-down: ## Stop all Docker Compose services from batteries directory
@echo "Stopping Docker services..."
docker-compose -f batteries/docker-compose.yml down
@echo "Docker services stopped"
docker-logs: ## View logs from Docker services in batteries directory
docker-down-whisper: ## Stop only the Whisper STT service
@echo "Stopping Whisper STT service..."
docker-compose -f batteries/docker-compose.yml down whisper
@echo "Whisper STT service stopped"
docker-down-kokoro: ## Stop only the Kokoro TTS service
@echo "Stopping Kokoro TTS service..."
docker-compose -f batteries/docker-compose.yml down kokoro-tts
@echo "Kokoro TTS service stopped"
docker-logs: ## View logs from all Docker services in batteries directory
@echo "Displaying logs from Docker services..."
docker-compose -f batteries/docker-compose.yml logs -f
# Convenience target to setup everything
setup-complete: setup-whisper docker-up
@echo "Complete setup finished! STT and TTS services are running."
@echo "Note: Docker services will download the Whisper model automatically if not present."
docker-logs-whisper: ## View logs from Whisper STT service only
@echo "Displaying logs from Whisper STT service..."
docker-compose -f batteries/docker-compose.yml logs -f whisper
docker-logs-kokoro: ## View logs from Kokoro TTS service only
@echo "Displaying logs from Kokoro TTS service..."
docker-compose -f batteries/docker-compose.yml logs -f kokoro-tts

View File

@@ -69,7 +69,14 @@ set values as you need them to be.
For speech-to-text (STT) and text-to-speech (TTS) functionality:
1. The project uses Whisper.cpp for STT and Kokoro for TTS
2. Docker Compose automatically downloads the required Whisper model on first run
3. To start the services: `make docker-up`
4. To stop the services: `make docker-down`
5. The STT service runs on http://localhost:8081
6. The TTS service runs on http://localhost:8880
3. To start all services: `make docker-up`
4. To start only STT service: `make docker-up-whisper`
5. To start only TTS service: `make docker-up-kokoro`
6. To stop all services: `make docker-down`
7. To stop only STT service: `make docker-down-whisper`
8. To stop only TTS service: `make docker-down-kokoro`
9. To view all service logs: `make docker-logs`
10. To view only STT service logs: `make docker-logs-whisper`
11. To view only TTS service logs: `make docker-logs-kokoro`
12. The STT service runs on http://localhost:8081
13. The TTS service runs on http://localhost:8880

View File

@@ -26,13 +26,13 @@ RAGDir = "ragimport"
# extra tts
TTS_ENABLED = false
TTS_URL = "http://localhost:8880/v1/audio/speech"
TTS_SPEED = 1.0
TTS_SPEED = 1.2
# extra stt
STT_ENABLED = false
STT_TYPE = "WHISPER_SERVER" # WHISPER_SERVER or WHISPER_BINARY
STT_URL = "http://localhost:8081/inference"
WhisperBinaryPath = "./batteries/whisper.cpp/whisper-cli" # Path to whisper binary (for WHISPER_BINARY mode)
WhisperModelPath = "./batteries/whisper.cpp/ggml-model.bin" # Path to whisper model file (for WHISPER_BINARY mode)
WhisperBinaryPath = "./batteries/whisper.cpp/build/bin/whisper-cli" # Path to whisper binary (for WHISPER_BINARY mode)
WhisperModelPath = "./batteries/whisper.cpp/ggml-large-v3-turbo.bin" # Path to whisper model file (for WHISPER_BINARY mode)
STT_LANG = "en" # Language for speech recognition (for WHISPER_BINARY mode)
STT_SR = 16000 # Sample rate for audio recording
DBPATH = "gflt.db"