Fix: slow startup from silly migrations
This commit is contained in:
28
bot.go
28
bot.go
@@ -1529,21 +1529,23 @@ func init() {
|
|||||||
asr = NewSTT(logger, cfg)
|
asr = NewSTT(logger, cfg)
|
||||||
}
|
}
|
||||||
if cfg.PlaywrightEnabled {
|
if cfg.PlaywrightEnabled {
|
||||||
if err := checkPlaywright(); err != nil {
|
go func() {
|
||||||
// slow, need a faster check if playwright install
|
|
||||||
if err := installPW(); err != nil {
|
|
||||||
logger.Error("failed to install playwright", "error", err)
|
|
||||||
cancel()
|
|
||||||
os.Exit(1)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if err := checkPlaywright(); err != nil {
|
if err := checkPlaywright(); err != nil {
|
||||||
logger.Error("failed to run playwright", "error", err)
|
// slow, need a faster check if playwright install
|
||||||
cancel()
|
if err := installPW(); err != nil {
|
||||||
os.Exit(1)
|
logger.Error("failed to install playwright", "error", err)
|
||||||
return
|
cancel()
|
||||||
|
os.Exit(1)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := checkPlaywright(); err != nil {
|
||||||
|
logger.Error("failed to run playwright", "error", err)
|
||||||
|
cancel()
|
||||||
|
os.Exit(1)
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}()
|
||||||
}
|
}
|
||||||
// atomic default values
|
// atomic default values
|
||||||
cachedModelColor.Store("orange")
|
cachedModelColor.Store("orange")
|
||||||
|
|||||||
@@ -23,9 +23,20 @@ func (p *ProviderSQL) Migrate() error {
|
|||||||
p.logger.Error("Failed to read migrations directory;", "error", err)
|
p.logger.Error("Failed to read migrations directory;", "error", err)
|
||||||
return fmt.Errorf("failed to read migrations directory: %w", err)
|
return fmt.Errorf("failed to read migrations directory: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if FTS already has data - skip populate migration if so
|
||||||
|
var ftsCount int
|
||||||
|
_ = p.db.QueryRow("SELECT COUNT(*) FROM fts_embeddings").Scan(&ftsCount)
|
||||||
|
skipFTSMigration := ftsCount > 0
|
||||||
|
|
||||||
// Execute each .up.sql file
|
// Execute each .up.sql file
|
||||||
for _, file := range files {
|
for _, file := range files {
|
||||||
if strings.HasSuffix(file.Name(), ".up.sql") {
|
if strings.HasSuffix(file.Name(), ".up.sql") {
|
||||||
|
// Skip FTS populate migration if already populated
|
||||||
|
if skipFTSMigration && strings.Contains(file.Name(), "004_populate_fts") {
|
||||||
|
p.logger.Debug("Skipping FTS migration - already populated", "file", file.Name())
|
||||||
|
continue
|
||||||
|
}
|
||||||
err := p.executeMigration(migrationsDir, file.Name())
|
err := p.executeMigration(migrationsDir, file.Name())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
p.logger.Error("Failed to execute migration %s: %v", file.Name(), err)
|
p.logger.Error("Failed to execute migration %s: %v", file.Name(), err)
|
||||||
|
|||||||
@@ -1,26 +1,4 @@
|
|||||||
-- Populate FTS table with existing embeddings
|
-- Populate FTS table with existing embeddings (incremental - only inserts missing rows)
|
||||||
DELETE FROM fts_embeddings;
|
-- Only use 768 embeddings as that's what we use
|
||||||
|
INSERT OR IGNORE INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
SELECT slug, raw_text, filename, 768 FROM embeddings_768;
|
||||||
SELECT slug, raw_text, filename, 384 FROM embeddings_384;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 768 FROM embeddings_768;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 1024 FROM embeddings_1024;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 1536 FROM embeddings_1536;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 2048 FROM embeddings_2048;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 3072 FROM embeddings_3072;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 4096 FROM embeddings_4096;
|
|
||||||
|
|
||||||
INSERT INTO fts_embeddings (slug, raw_text, filename, embedding_size)
|
|
||||||
SELECT slug, raw_text, filename, 5120 FROM embeddings_5120;
|
|
||||||
87
storage/migrations/005_drop_unused_embeddings.down.sql
Normal file
87
storage/migrations/005_drop_unused_embeddings.down.sql
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
-- Recreate unused embedding tables (for rollback)
|
||||||
|
CREATE TABLE IF NOT EXISTS embeddings_384 (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
embeddings BLOB NOT NULL,
|
||||||
|
slug TEXT NOT NULL,
|
||||||
|
raw_text TEXT NOT NULL,
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS embeddings_1024 (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
embeddings BLOB NOT NULL,
|
||||||
|
slug TEXT NOT NULL,
|
||||||
|
raw_text TEXT NOT NULL,
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS embeddings_1536 (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
embeddings BLOB NOT NULL,
|
||||||
|
slug TEXT NOT NULL,
|
||||||
|
raw_text TEXT NOT NULL,
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS embeddings_2048 (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
embeddings BLOB NOT NULL,
|
||||||
|
slug TEXT NOT NULL,
|
||||||
|
raw_text TEXT NOT NULL,
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS embeddings_3072 (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
embeddings BLOB NOT NULL,
|
||||||
|
slug TEXT NOT NULL,
|
||||||
|
raw_text TEXT NOT NULL,
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS embeddings_4096 (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
embeddings BLOB NOT NULL,
|
||||||
|
slug TEXT NOT NULL,
|
||||||
|
raw_text TEXT NOT NULL,
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS embeddings_5120 (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
embeddings BLOB NOT NULL,
|
||||||
|
slug TEXT NOT NULL,
|
||||||
|
raw_text TEXT NOT NULL,
|
||||||
|
filename TEXT NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_384_filename ON embeddings_384(filename);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_1024_filename ON embeddings_1024(filename);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_1536_filename ON embeddings_1536(filename);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_2048_filename ON embeddings_2048(filename);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_3072_filename ON embeddings_3072(filename);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_4096_filename ON embeddings_4096(filename);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_5120_filename ON embeddings_5120(filename);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_384_slug ON embeddings_384(slug);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_1024_slug ON embeddings_1024(slug);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_1536_slug ON embeddings_1536(slug);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_2048_slug ON embeddings_2048(slug);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_3072_slug ON embeddings_3072(slug);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_4096_slug ON embeddings_4096(slug);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_5120_slug ON embeddings_5120(slug);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_384_created_at ON embeddings_384(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_1024_created_at ON embeddings_1024(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_1536_created_at ON embeddings_1536(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_2048_created_at ON embeddings_2048(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_3072_created_at ON embeddings_3072(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_4096_created_at ON embeddings_4096(created_at);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_embeddings_5120_created_at ON embeddings_5120(created_at);
|
||||||
32
storage/migrations/005_drop_unused_embeddings.up.sql
Normal file
32
storage/migrations/005_drop_unused_embeddings.up.sql
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
-- Drop unused embedding tables (we only use 768)
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_384_filename;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_1024_filename;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_1536_filename;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_2048_filename;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_3072_filename;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_4096_filename;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_5120_filename;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_384_slug;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_1024_slug;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_1536_slug;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_2048_slug;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_3072_slug;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_4096_slug;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_5120_slug;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_384_created_at;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_1024_created_at;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_1536_created_at;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_2048_created_at;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_3072_created_at;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_4096_created_at;
|
||||||
|
DROP INDEX IF EXISTS idx_embeddings_5120_created_at;
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS embeddings_384;
|
||||||
|
DROP TABLE IF EXISTS embeddings_1024;
|
||||||
|
DROP TABLE IF EXISTS embeddings_1536;
|
||||||
|
DROP TABLE IF EXISTS embeddings_2048;
|
||||||
|
DROP TABLE IF EXISTS embeddings_3072;
|
||||||
|
DROP TABLE IF EXISTS embeddings_4096;
|
||||||
|
DROP TABLE IF EXISTS embeddings_5120;
|
||||||
@@ -48,22 +48,8 @@ func mathBitsToFloat32(b uint32) float32 {
|
|||||||
|
|
||||||
func fetchTableName(emb []float32) (string, error) {
|
func fetchTableName(emb []float32) (string, error) {
|
||||||
switch len(emb) {
|
switch len(emb) {
|
||||||
case 384:
|
|
||||||
return "embeddings_384", nil
|
|
||||||
case 768:
|
case 768:
|
||||||
return "embeddings_768", nil
|
return "embeddings_768", nil
|
||||||
case 1024:
|
|
||||||
return "embeddings_1024", nil
|
|
||||||
case 1536:
|
|
||||||
return "embeddings_1536", nil
|
|
||||||
case 2048:
|
|
||||||
return "embeddings_2048", nil
|
|
||||||
case 3072:
|
|
||||||
return "embeddings_3072", nil
|
|
||||||
case 4096:
|
|
||||||
return "embeddings_4096", nil
|
|
||||||
case 5120:
|
|
||||||
return "embeddings_5120", nil
|
|
||||||
default:
|
default:
|
||||||
return "", fmt.Errorf("no table for the size of %d", len(emb))
|
return "", fmt.Errorf("no table for the size of %d", len(emb))
|
||||||
}
|
}
|
||||||
@@ -170,62 +156,26 @@ func sqrt(f float32) float32 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (p ProviderSQL) ListFiles() ([]string, error) {
|
func (p ProviderSQL) ListFiles() ([]string, error) {
|
||||||
fileLists := make([][]string, 0)
|
query := "SELECT DISTINCT filename FROM embeddings_768"
|
||||||
|
rows, err := p.db.Query(query)
|
||||||
// Query all supported tables and combine results
|
if err != nil {
|
||||||
tableNames := []string{
|
return nil, err
|
||||||
"embeddings_384", "embeddings_768", "embeddings_1024", "embeddings_1536",
|
|
||||||
"embeddings_2048", "embeddings_3072", "embeddings_4096", "embeddings_5120",
|
|
||||||
}
|
}
|
||||||
for _, table := range tableNames {
|
defer rows.Close()
|
||||||
query := "SELECT DISTINCT filename FROM " + table
|
|
||||||
rows, err := p.db.Query(query)
|
var allFiles []string
|
||||||
if err != nil {
|
for rows.Next() {
|
||||||
// Continue if one table doesn't exist
|
var filename string
|
||||||
|
if err := rows.Scan(&filename); err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
allFiles = append(allFiles, filename)
|
||||||
var files []string
|
|
||||||
for rows.Next() {
|
|
||||||
var filename string
|
|
||||||
if err := rows.Scan(&filename); err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
files = append(files, filename)
|
|
||||||
}
|
|
||||||
rows.Close()
|
|
||||||
|
|
||||||
fileLists = append(fileLists, files)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Combine and deduplicate
|
|
||||||
fileSet := make(map[string]bool)
|
|
||||||
var allFiles []string
|
|
||||||
for _, files := range fileLists {
|
|
||||||
for _, file := range files {
|
|
||||||
if !fileSet[file] {
|
|
||||||
fileSet[file] = true
|
|
||||||
allFiles = append(allFiles, file)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return allFiles, nil
|
return allFiles, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p ProviderSQL) RemoveEmbByFileName(filename string) error {
|
func (p ProviderSQL) RemoveEmbByFileName(filename string) error {
|
||||||
var errors []string
|
query := "DELETE FROM embeddings_768 WHERE filename = ?"
|
||||||
tableNames := []string{
|
_, err := p.db.Exec(query, filename)
|
||||||
"embeddings_384", "embeddings_768", "embeddings_1024", "embeddings_1536",
|
return err
|
||||||
"embeddings_2048", "embeddings_3072", "embeddings_4096", "embeddings_5120",
|
|
||||||
}
|
|
||||||
for _, table := range tableNames {
|
|
||||||
query := fmt.Sprintf("DELETE FROM %s WHERE filename = ?", table)
|
|
||||||
if _, err := p.db.Exec(query, filename); err != nil {
|
|
||||||
errors = append(errors, err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(errors) > 0 {
|
|
||||||
return fmt.Errorf("errors occurred: %v", errors)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user