Enha: rag tuning and tests
This commit is contained in:
131
rag/rag_real_test.go
Normal file
131
rag/rag_real_test.go
Normal file
@@ -0,0 +1,131 @@
|
||||
package rag
|
||||
|
||||
import (
|
||||
"gf-lt/config"
|
||||
"gf-lt/storage"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRealBiblicalQuery(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping real embedder test in short mode")
|
||||
}
|
||||
// Check if the embedder model exists
|
||||
modelPath := filepath.Join("..", "onnx", "embedgemma", "model_q4.onnx")
|
||||
if _, err := os.Stat(modelPath); os.IsNotExist(err) {
|
||||
t.Skipf("embedder model not found at %s; skipping real embedder test", modelPath)
|
||||
}
|
||||
tokenizerPath := filepath.Join("..", "onnx", "embedgemma", "tokenizer.json")
|
||||
dbPath := filepath.Join("..", "gflt.db")
|
||||
if _, err := os.Stat(dbPath); os.IsNotExist(err) {
|
||||
t.Skipf("database not found at %s; skipping real embedder test", dbPath)
|
||||
}
|
||||
cfg := &config.Config{
|
||||
EmbedModelPath: modelPath,
|
||||
EmbedTokenizerPath: tokenizerPath,
|
||||
EmbedDims: 768,
|
||||
RAGWordLimit: 250,
|
||||
RAGOverlapWords: 25,
|
||||
RAGBatchSize: 1,
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError}))
|
||||
store := storage.NewProviderSQL(dbPath, logger)
|
||||
if store == nil {
|
||||
t.Fatal("failed to create storage provider")
|
||||
}
|
||||
rag, err := New(logger, store, cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create RAG instance: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { rag.Destroy() })
|
||||
|
||||
query := "bald prophet and two she bears"
|
||||
results, err := rag.Search(query, 30)
|
||||
if err != nil {
|
||||
t.Fatalf("search failed: %v", err)
|
||||
}
|
||||
found := false
|
||||
for i, row := range results {
|
||||
if row.Slug == "kjv_bible.epub_1786_0" {
|
||||
found = true
|
||||
t.Logf("target chunk found at rank %d", i+1)
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("target chunk not found in search results for query %q", query)
|
||||
t.Logf("results slugs:")
|
||||
for i, r := range results {
|
||||
t.Logf("%d: %s", i+1, r.Slug)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRealQueryVariations(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping real embedder test in short mode")
|
||||
}
|
||||
modelPath := filepath.Join("..", "onnx", "embedgemma", "model_q4.onnx")
|
||||
if _, err := os.Stat(modelPath); os.IsNotExist(err) {
|
||||
t.Skipf("embedder model not found at %s; skipping real embedder test", modelPath)
|
||||
}
|
||||
tokenizerPath := filepath.Join("..", "onnx", "embedgemma", "tokenizer.json")
|
||||
dbPath := filepath.Join("..", "gflt.db")
|
||||
if _, err := os.Stat(dbPath); os.IsNotExist(err) {
|
||||
t.Skipf("database not found at %s; skipping real embedder test", dbPath)
|
||||
}
|
||||
cfg := &config.Config{
|
||||
EmbedModelPath: modelPath,
|
||||
EmbedTokenizerPath: tokenizerPath,
|
||||
EmbedDims: 768,
|
||||
RAGWordLimit: 250,
|
||||
RAGOverlapWords: 25,
|
||||
RAGBatchSize: 1,
|
||||
}
|
||||
logger := slog.New(slog.NewTextHandler(nil, &slog.HandlerOptions{Level: slog.LevelError}))
|
||||
store := storage.NewProviderSQL(dbPath, logger)
|
||||
if store == nil {
|
||||
t.Fatal("failed to create storage provider")
|
||||
}
|
||||
rag, err := New(logger, store, cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create RAG instance: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { rag.Destroy() })
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
query string
|
||||
}{
|
||||
{"she bears", "she bears"},
|
||||
{"bald head", "bald head"},
|
||||
{"two she bears out of the wood", "two she bears out of the wood"},
|
||||
{"bald prophet", "bald prophet"},
|
||||
{"go up thou bald head", "\"go up thou bald head\""},
|
||||
{"two she bears", "\"two she bears\""},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
results, err := rag.Search(tt.query, 10)
|
||||
if err != nil {
|
||||
t.Fatalf("search failed: %v", err)
|
||||
}
|
||||
found := false
|
||||
for _, row := range results {
|
||||
if row.Slug == "kjv_bible.epub_1786_0" {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("target chunk not found for query %q", tt.query)
|
||||
for i, r := range results {
|
||||
t.Logf("%d: %s", i+1, r.Slug)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user