Enha: migrations with different emb tables

This commit is contained in:
Grail Finder
2025-11-24 10:44:12 +03:00
parent 4774ea48db
commit 3b64baf9eb
5 changed files with 123 additions and 54 deletions

View File

@@ -43,10 +43,7 @@ func New(l *slog.Logger, s storage.FullRepo, cfg *config.Config) *RAG {
storage: NewVectorStorage(l, s),
}
// Create the necessary tables
if err := rag.storage.CreateTables(); err != nil {
l.Error("failed to create vector tables", "error", err)
}
// Note: Vector tables are created via database migrations, not at runtime
return rag
}

View File

@@ -28,45 +28,6 @@ func NewVectorStorage(logger *slog.Logger, store storage.FullRepo) *VectorStorag
}
}
// CreateTables creates the necessary tables for vector storage
func (vs *VectorStorage) CreateTables() error {
// Create tables for common embedding dimensions
embeddingSizes := []int{384, 768, 1024, 1536, 2048, 3072, 4096, 5120}
// Pre-allocate queries slice: each embedding size needs 1 table + 3 indexes = 4 queries per size
queries := make([]string, 0, len(embeddingSizes)*4)
// Generate table creation queries for each embedding size
for _, size := range embeddingSizes {
tableName := fmt.Sprintf("embeddings_%d", size)
queries = append(queries,
fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s (
id INTEGER PRIMARY KEY AUTOINCREMENT,
embeddings BLOB NOT NULL,
slug TEXT NOT NULL,
raw_text TEXT NOT NULL,
filename TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)`, tableName),
)
}
// Add indexes for all supported sizes
for _, size := range embeddingSizes {
tableName := fmt.Sprintf("embeddings_%d", size)
queries = append(queries,
fmt.Sprintf(`CREATE INDEX IF NOT EXISTS idx_%s_filename ON %s(filename)`, tableName, tableName),
fmt.Sprintf(`CREATE INDEX IF NOT EXISTS idx_%s_slug ON %s(slug)`, tableName, tableName),
fmt.Sprintf(`CREATE INDEX IF NOT EXISTS idx_%s_created_at ON %s(created_at)`, tableName, tableName),
)
}
for _, query := range queries {
if _, err := vs.sqlxDB.Exec(query); err != nil {
return fmt.Errorf("failed to create table: %w", err)
}
}
return nil
}
// SerializeVector converts []float32 to binary blob
func SerializeVector(vec []float32) []byte {