Enha: migrations with different emb tables

This commit is contained in:
Grail Finder
2025-11-24 10:44:12 +03:00
parent 4774ea48db
commit 3b64baf9eb
5 changed files with 123 additions and 54 deletions

View File

@@ -1,10 +1,34 @@
-- Drop vector storage tables
DROP INDEX IF EXISTS idx_embeddings_384_filename;
DROP INDEX IF EXISTS idx_embeddings_768_filename;
DROP INDEX IF EXISTS idx_embeddings_1024_filename;
DROP INDEX IF EXISTS idx_embeddings_1536_filename;
DROP INDEX IF EXISTS idx_embeddings_2048_filename;
DROP INDEX IF EXISTS idx_embeddings_3072_filename;
DROP INDEX IF EXISTS idx_embeddings_4096_filename;
DROP INDEX IF EXISTS idx_embeddings_5120_filename;
DROP INDEX IF EXISTS idx_embeddings_384_slug;
DROP INDEX IF EXISTS idx_embeddings_768_slug;
DROP INDEX IF EXISTS idx_embeddings_1024_slug;
DROP INDEX IF EXISTS idx_embeddings_1536_slug;
DROP INDEX IF EXISTS idx_embeddings_2048_slug;
DROP INDEX IF EXISTS idx_embeddings_3072_slug;
DROP INDEX IF EXISTS idx_embeddings_4096_slug;
DROP INDEX IF EXISTS idx_embeddings_5120_slug;
DROP INDEX IF EXISTS idx_embeddings_384_created_at;
DROP INDEX IF EXISTS idx_embeddings_768_created_at;
DROP INDEX IF EXISTS idx_embeddings_1024_created_at;
DROP INDEX IF EXISTS idx_embeddings_1536_created_at;
DROP INDEX IF EXISTS idx_embeddings_2048_created_at;
DROP INDEX IF EXISTS idx_embeddings_3072_created_at;
DROP INDEX IF EXISTS idx_embeddings_4096_created_at;
DROP INDEX IF EXISTS idx_embeddings_5120_created_at;
DROP TABLE IF EXISTS embeddings_384;
DROP TABLE IF EXISTS embeddings_768;
DROP TABLE IF EXISTS embeddings_1024;
DROP TABLE IF EXISTS embeddings_1536;
DROP TABLE IF EXISTS embeddings_2048;
DROP TABLE IF EXISTS embeddings_3072;
DROP TABLE IF EXISTS embeddings_4096;
DROP TABLE IF EXISTS embeddings_5120;

View File

@@ -8,6 +8,60 @@ CREATE TABLE IF NOT EXISTS embeddings_384 (
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS embeddings_768 (
id INTEGER PRIMARY KEY AUTOINCREMENT,
embeddings BLOB NOT NULL,
slug TEXT NOT NULL,
raw_text TEXT NOT NULL,
filename TEXT NOT NULL DEFAULT '',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS embeddings_1024 (
id INTEGER PRIMARY KEY AUTOINCREMENT,
embeddings BLOB NOT NULL,
slug TEXT NOT NULL,
raw_text TEXT NOT NULL,
filename TEXT NOT NULL DEFAULT '',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS embeddings_1536 (
id INTEGER PRIMARY KEY AUTOINCREMENT,
embeddings BLOB NOT NULL,
slug TEXT NOT NULL,
raw_text TEXT NOT NULL,
filename TEXT NOT NULL DEFAULT '',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS embeddings_2048 (
id INTEGER PRIMARY KEY AUTOINCREMENT,
embeddings BLOB NOT NULL,
slug TEXT NOT NULL,
raw_text TEXT NOT NULL,
filename TEXT NOT NULL DEFAULT '',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS embeddings_3072 (
id INTEGER PRIMARY KEY AUTOINCREMENT,
embeddings BLOB NOT NULL,
slug TEXT NOT NULL,
raw_text TEXT NOT NULL,
filename TEXT NOT NULL DEFAULT '',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS embeddings_4096 (
id INTEGER PRIMARY KEY AUTOINCREMENT,
embeddings BLOB NOT NULL,
slug TEXT NOT NULL,
raw_text TEXT NOT NULL,
filename TEXT NOT NULL DEFAULT '',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS embeddings_5120 (
id INTEGER PRIMARY KEY AUTOINCREMENT,
embeddings BLOB NOT NULL,
@@ -19,8 +73,26 @@ CREATE TABLE IF NOT EXISTS embeddings_5120 (
-- Indexes for better performance
CREATE INDEX IF NOT EXISTS idx_embeddings_384_filename ON embeddings_384(filename);
CREATE INDEX IF NOT EXISTS idx_embeddings_768_filename ON embeddings_768(filename);
CREATE INDEX IF NOT EXISTS idx_embeddings_1024_filename ON embeddings_1024(filename);
CREATE INDEX IF NOT EXISTS idx_embeddings_1536_filename ON embeddings_1536(filename);
CREATE INDEX IF NOT EXISTS idx_embeddings_2048_filename ON embeddings_2048(filename);
CREATE INDEX IF NOT EXISTS idx_embeddings_3072_filename ON embeddings_3072(filename);
CREATE INDEX IF NOT EXISTS idx_embeddings_4096_filename ON embeddings_4096(filename);
CREATE INDEX IF NOT EXISTS idx_embeddings_5120_filename ON embeddings_5120(filename);
CREATE INDEX IF NOT EXISTS idx_embeddings_384_slug ON embeddings_384(slug);
CREATE INDEX IF NOT EXISTS idx_embeddings_768_slug ON embeddings_768(slug);
CREATE INDEX IF NOT EXISTS idx_embeddings_1024_slug ON embeddings_1024(slug);
CREATE INDEX IF NOT EXISTS idx_embeddings_1536_slug ON embeddings_1536(slug);
CREATE INDEX IF NOT EXISTS idx_embeddings_2048_slug ON embeddings_2048(slug);
CREATE INDEX IF NOT EXISTS idx_embeddings_3072_slug ON embeddings_3072(slug);
CREATE INDEX IF NOT EXISTS idx_embeddings_4096_slug ON embeddings_4096(slug);
CREATE INDEX IF NOT EXISTS idx_embeddings_5120_slug ON embeddings_5120(slug);
CREATE INDEX IF NOT EXISTS idx_embeddings_384_created_at ON embeddings_384(created_at);
CREATE INDEX IF NOT EXISTS idx_embeddings_768_created_at ON embeddings_768(created_at);
CREATE INDEX IF NOT EXISTS idx_embeddings_1024_created_at ON embeddings_1024(created_at);
CREATE INDEX IF NOT EXISTS idx_embeddings_1536_created_at ON embeddings_1536(created_at);
CREATE INDEX IF NOT EXISTS idx_embeddings_2048_created_at ON embeddings_2048(created_at);
CREATE INDEX IF NOT EXISTS idx_embeddings_3072_created_at ON embeddings_3072(created_at);
CREATE INDEX IF NOT EXISTS idx_embeddings_4096_created_at ON embeddings_4096(created_at);
CREATE INDEX IF NOT EXISTS idx_embeddings_5120_created_at ON embeddings_5120(created_at);

View File

@@ -45,17 +45,24 @@ func mathBitsToFloat32(b uint32) float32 {
return *(*float32)(unsafe.Pointer(&b))
}
var (
vecTableName5120 = "embeddings_5120"
vecTableName384 = "embeddings_384"
)
func fetchTableName(emb []float32) (string, error) {
switch len(emb) {
case 5120:
return vecTableName5120, nil
case 384:
return vecTableName384, nil
return "embeddings_384", nil
case 768:
return "embeddings_768", nil
case 1024:
return "embeddings_1024", nil
case 1536:
return "embeddings_1536", nil
case 2048:
return "embeddings_2048", nil
case 3072:
return "embeddings_3072", nil
case 4096:
return "embeddings_4096", nil
case 5120:
return "embeddings_5120", nil
default:
return "", fmt.Errorf("no table for the size of %d", len(emb))
}
@@ -185,8 +192,12 @@ func sqrt(f float32) float32 {
func (p ProviderSQL) ListFiles() ([]string, error) {
fileLists := make([][]string, 0)
// Query both tables and combine results
for _, table := range []string{vecTableName384, vecTableName5120} {
// Query all supported tables and combine results
tableNames := []string{
"embeddings_384", "embeddings_768", "embeddings_1024", "embeddings_1536",
"embeddings_2048", "embeddings_3072", "embeddings_4096", "embeddings_5120",
}
for _, table := range tableNames {
query := "SELECT DISTINCT filename FROM " + table
rows, err := p.db.Query(query)
if err != nil {
@@ -225,7 +236,11 @@ func (p ProviderSQL) ListFiles() ([]string, error) {
func (p ProviderSQL) RemoveEmbByFileName(filename string) error {
var errors []string
for _, table := range []string{vecTableName384, vecTableName5120} {
tableNames := []string{
"embeddings_384", "embeddings_768", "embeddings_1024", "embeddings_1536",
"embeddings_2048", "embeddings_3072", "embeddings_4096", "embeddings_5120",
}
for _, table := range tableNames {
query := fmt.Sprintf("DELETE FROM %s WHERE filename = ?", table)
if _, err := p.db.Exec(query, filename); err != nil {
errors = append(errors, err.Error())