From d2caebdb4fd3ad148aad20866503b7d46d546404 Mon Sep 17 00:00:00 2001 From: Grail Finder Date: Fri, 6 Mar 2026 09:11:25 +0300 Subject: [PATCH] Enha (onnx): use gpu --- Makefile | 98 ++++++++++++++++++++++++++++++++++++++++++++++++- rag/embedder.go | 68 +++++++++++++++++++++++++++++++++- 2 files changed, 164 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 4314d99..78db940 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: setconfig run lint lintall install-linters setup-whisper build-whisper download-whisper-model docker-up docker-down docker-logs noextra-run installdelve checkdelve +.PHONY: setconfig run lint lintall install-linters setup-whisper build-whisper download-whisper-model docker-up docker-down docker-logs noextra-run installdelve checkdelve fetch-onnx install-onnx-deps run: setconfig go build -tags extra -o gf-lt && ./gf-lt @@ -33,6 +33,102 @@ lintall: lint fetch-onnx: mkdir -p onnx/embedgemma && curl -o onnx/embedgemma/config.json -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/config.json && curl -o onnx/embedgemma/tokenizer.json -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/tokenizer.json && curl -o onnx/embedgemma/model_q4.onnx -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/onnx/model_q4.onnx && curl -o onnx/embedgemma/model_q4.onnx_data -L https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX/resolve/main/onnx/model_q4.onnx_data?download=true +install-onnx-deps: ## Install ONNX Runtime with CUDA support (or CPU fallback) + @echo "=== ONNX Runtime Installer ===" && \ + echo "" && \ + echo "Checking for existing ONNX Runtime..." && \ + if ldconfig -p 2>/dev/null | grep -q libonnxruntime.so.1; then \ + echo "ONNX Runtime is already installed:" && \ + ldconfig -p 2>/dev/null | grep libonnxruntime && \ + echo "" && \ + echo "Skipping installation. To reinstall, remove existing libs first:" && \ + echo " sudo rm -f /usr/local/lib/libonnxruntime*.so*" && \ + exit 0; \ + fi && \ + echo "No ONNX Runtime found. Proceeding with installation..." && \ + echo "" && \ + echo "Detecting CUDA version..." && \ + HAS_CUDA=0 && \ + if command -v nvidia-smi >/dev/null 2>&1; then \ + CUDA_INFO=$$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -1) && \ + if [ -n "$$CUDA_INFO" ]; then \ + echo "Found NVIDIA GPU with driver: $$CUDA_INFO" && \ + HAS_CUDA=1; \ + else \ + echo "NVIDIA driver found but could not detect CUDA version"; \ + fi; \ + else \ + echo "No NVIDIA GPU detected (nvidia-smi not found)"; \ + fi && \ + echo "" && \ + echo "Determining ONNX Runtime version..." && \ + ARCH=$$(uname -m) && \ + if [ "$$ARCH" = "x86_64" ]; then \ + ONNX_ARCH="x64"; \ + elif [ "$$ARCH" = "aarch64" ] || [ "$$ARCH" = "arm64" ]; then \ + ONNX_ARCH="aarch64"; \ + else \ + echo "Unsupported architecture: $$ARCH" && \ + exit 1; \ + fi && \ + echo "Detected architecture: $$ARCH (ONNX runtime: $$ONNX_ARCH)" && \ + if [ "$$HAS_CUDA" = "1" ]; then \ + echo "Installing ONNX Runtime with CUDA support..."; \ + ONNX_VERSION="1.24.2"; \ + else \ + echo "Installing ONNX Runtime (CPU version)..."; \ + ONNX_VERSION="1.24.2"; \ + fi && \ + FILENAME="onnxruntime-linux-$${ONNX_ARCH}-${ONNX_VERSION}.tgz" && \ + URL="https://github.com/microsoft/onnxruntime/releases/download/v$${ONNX_VERSION}/$${FILENAME}" && \ + echo "Downloading $${URL}..." && \ + mkdir -p /tmp/onnx-install && \ + curl -L -o /tmp/onnx-install/$${FILENAME} "$${URL}" || { \ + echo "Failed to download ONNX Runtime v$${ONNX_VERSION}. Trying v1.18.0..." && \ + ONNX_VERSION="1.18.0" && \ + FILENAME="onnxruntime-linux-$${ONNX_ARCH}-${ONNX_VERSION}.tgz" && \ + URL="https://github.com/microsoft/onnxruntime/releases/download/v$${ONNX_VERSION}/$${FILENAME}" && \ + curl -L -o /tmp/onnx-install/$${FILENAME} "$${URL}" || { \ + echo "ERROR: Failed to download ONNX Runtime from GitHub" && \ + echo "" && \ + echo "Please install manually:" && \ + echo " 1. Go to https://github.com/microsoft/onnxruntime/releases" && \ + echo " 2. Download onnxruntime-linux-$${ONNX_ARCH}-VERSION.tgz" && \ + echo " 3. Extract and copy to /usr/local/lib:" && \ + echo " tar -xzf onnxruntime-linux-$${ONNX_ARCH}-VERSION.tgz" && \ + echo " sudo cp -r onnxruntime-linux-$${ONNX_ARCH}-VERSION/lib/* /usr/local/lib/" && \ + echo " sudo ldconfig" && \ + exit 1; \ + }; \ + } && \ + echo "Extracting..." && \ + cd /tmp/onnx-install && tar -xzf $${FILENAME} && \ + echo "Installing to /usr/local/lib..." && \ + ONNX_DIR=$$(find /tmp/onnx-install -maxdepth 1 -type d -name "onnxruntime-linux-*") && \ + if [ -d "$${ONNX_DIR}/lib" ]; then \ + cp -r $${ONNX_DIR}/lib/* /usr/local/lib/ 2>/dev/null || sudo cp -r $${ONNX_DIR}/lib/* /usr/local/lib/; \ + else \ + echo "ERROR: Could not find lib directory in extracted archive" && \ + exit 1; \ + fi && \ + echo "Updating library cache..." && \ + sudo ldconfig 2>/dev/null || ldconfig && \ + echo "" && \ + echo "=== Installation complete! ===" && \ + echo "" && \ + echo "Installed libraries:" && \ + ldconfig -p | grep libonnxruntime || echo "(libraries may require logout/relogin to appear)" && \ + echo "" && \ + if [ "$$HAS_CUDA" = "1" ]; then \ + echo "NOTE: CUDA-enabled ONNX Runtime installed."; \ + echo "Ensure you also have CUDA libraries installed:"; \ + echo " - libcudnn, libcublas, libcurand"; \ + else \ + echo "NOTE: CPU-only ONNX Runtime installed."; \ + echo "For GPU support, install CUDA and re-run this script."; \ + fi && \ + rm -rf /tmp/onnx-install + # Whisper STT Setup (in batteries directory) setup-whisper: build-whisper download-whisper-model diff --git a/rag/embedder.go b/rag/embedder.go index 59dbfd2..13f6a6e 100644 --- a/rag/embedder.go +++ b/rag/embedder.go @@ -156,14 +156,22 @@ type ONNXEmbedder struct { var onnxInitOnce sync.Once var onnxReady bool var onnxLibPath string +var cudaLibPath string var onnxLibPaths = []string{ "/usr/lib/libonnxruntime.so", + "/usr/lib/libonnxruntime.so.1.24.2", "/usr/local/lib/libonnxruntime.so", "/usr/lib/x86_64-linux-gnu/libonnxruntime.so", "/opt/onnxruntime/lib/libonnxruntime.so", } +var cudaLibPaths = []string{ + "/usr/lib/libonnxruntime_providers_cuda.so", + "/usr/local/lib/libonnxruntime_providers_cuda.so", + "/opt/onnxruntime/lib/libonnxruntime_providers_cuda.so", +} + func findONNXLibrary() string { for _, path := range onnxLibPaths { if _, err := os.Stat(path); err == nil { @@ -173,6 +181,15 @@ func findONNXLibrary() string { return "" } +func findCUDALibrary() string { + for _, path := range cudaLibPaths { + if _, err := os.Stat(path); err == nil { + return path + } + } + return "" +} + func NewONNXEmbedder(modelPath, tokenizerPath string, dims int, logger *slog.Logger) (*ONNXEmbedder, error) { // Check if model and tokenizer files exist if _, err := os.Stat(modelPath); err != nil { @@ -188,6 +205,12 @@ func NewONNXEmbedder(modelPath, tokenizerPath string, dims int, logger *slog.Log return nil, errors.New("ONNX runtime library not found in standard locations") } + // Find CUDA provider library (optional) + cudaLibPath = findCUDALibrary() + if cudaLibPath == "" { + fmt.Println("WARNING: CUDA provider library not found, will use CPU") + } + emb := &ONNXEmbedder{ tokenizerPath: tokenizerPath, dims: dims, @@ -223,16 +246,56 @@ func (e *ONNXEmbedder) ensureInitialized() error { onnxReady = false return } + // Register CUDA provider if available + if cudaLibPath != "" { + if err := onnxruntime_go.RegisterExecutionProviderLibrary("CUDA", cudaLibPath); err != nil { + e.logger.Warn("failed to register CUDA provider", "error", err) + } + } onnxReady = true }) if !onnxReady { return errors.New("ONNX runtime not ready") } + + // Create session options + opts, err := onnxruntime_go.NewSessionOptions() + if err != nil { + return fmt.Errorf("failed to create session options: %w", err) + } + defer opts.Destroy() + + // Try to add CUDA provider + useCUDA := cudaLibPath != "" + if useCUDA { + cudaOpts, err := onnxruntime_go.NewCUDAProviderOptions() + if err != nil { + e.logger.Warn("failed to create CUDA provider options, falling back to CPU", "error", err) + useCUDA = false + } else { + defer cudaOpts.Destroy() + if err := cudaOpts.Update(map[string]string{"device_id": "0"}); err != nil { + e.logger.Warn("failed to update CUDA options, falling back to CPU", "error", err) + useCUDA = false + } else if err := opts.AppendExecutionProviderCUDA(cudaOpts); err != nil { + e.logger.Warn("failed to append CUDA provider, falling back to CPU", "error", err) + useCUDA = false + } + } + } + + if useCUDA { + e.logger.Info("Using CUDA for ONNX inference") + } else { + e.logger.Info("Using CPU for ONNX inference") + } + + // Create session with options session, err := onnxruntime_go.NewDynamicAdvancedSession( e.getModelPath(), []string{"input_ids", "attention_mask"}, []string{"sentence_embedding"}, - nil, + opts, ) if err != nil { return fmt.Errorf("failed to create ONNX session: %w", err) @@ -304,6 +367,9 @@ func (e *ONNXEmbedder) Embed(text string) ([]float32, error) { } func (e *ONNXEmbedder) EmbedSlice(texts []string) ([][]float32, error) { + if err := e.ensureInitialized(); err != nil { + return nil, err + } encodings := make([]*tokenizer.Encoding, len(texts)) maxLen := 0 for i, txt := range texts {