Feat: new rag attempt
This commit is contained in:
@@ -2,11 +2,11 @@ package storage
|
||||
|
||||
import (
|
||||
"gf-lt/models"
|
||||
"errors"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/ncruces"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
type VectorRepo interface {
|
||||
@@ -14,6 +14,35 @@ type VectorRepo interface {
|
||||
SearchClosest(q []float32) ([]models.VectorRow, error)
|
||||
ListFiles() ([]string, error)
|
||||
RemoveEmbByFileName(filename string) error
|
||||
DB() *sqlx.DB
|
||||
}
|
||||
|
||||
// SerializeVector converts []float32 to binary blob
|
||||
func SerializeVector(vec []float32) []byte {
|
||||
buf := make([]byte, len(vec)*4) // 4 bytes per float32
|
||||
for i, v := range vec {
|
||||
binary.LittleEndian.PutUint32(buf[i*4:], mathFloat32bits(v))
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
// DeserializeVector converts binary blob back to []float32
|
||||
func DeserializeVector(data []byte) []float32 {
|
||||
count := len(data) / 4
|
||||
vec := make([]float32, count)
|
||||
for i := 0; i < count; i++ {
|
||||
vec[i] = mathBitsToFloat32(binary.LittleEndian.Uint32(data[i*4:]))
|
||||
}
|
||||
return vec
|
||||
}
|
||||
|
||||
// mathFloat32bits and mathBitsToFloat32 are helpers to convert between float32 and uint32
|
||||
func mathFloat32bits(f float32) uint32 {
|
||||
return binary.LittleEndian.Uint32((*(*[4]byte)(unsafe.Pointer(&f)))[:4])
|
||||
}
|
||||
|
||||
func mathBitsToFloat32(b uint32) float32 {
|
||||
return *(*float32)(unsafe.Pointer(&b))
|
||||
}
|
||||
|
||||
var (
|
||||
@@ -44,19 +73,8 @@ func (p ProviderSQL) WriteVector(row *models.VectorRow) error {
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
v, err := sqlite_vec.SerializeFloat32(row.Embeddings)
|
||||
if err != nil {
|
||||
p.logger.Error("failed to serialize vector",
|
||||
"emb-len", len(row.Embeddings), "error", err)
|
||||
return err
|
||||
}
|
||||
if v == nil {
|
||||
err = errors.New("empty vector after serialization")
|
||||
p.logger.Error("empty vector after serialization",
|
||||
"emb-len", len(row.Embeddings), "text", row.RawText, "error", err)
|
||||
return err
|
||||
}
|
||||
if err := stmt.BindBlob(1, v); err != nil {
|
||||
serializedEmbeddings := SerializeVector(row.Embeddings)
|
||||
if err := stmt.BindBlob(1, serializedEmbeddings); err != nil {
|
||||
p.logger.Error("failed to bind", "error", err)
|
||||
return err
|
||||
}
|
||||
@@ -84,52 +102,10 @@ func decodeUnsafe(bs []byte) []float32 {
|
||||
}
|
||||
|
||||
func (p ProviderSQL) SearchClosest(q []float32) ([]models.VectorRow, error) {
|
||||
tableName, err := fetchTableName(q)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stmt, _, err := p.s3Conn.Prepare(
|
||||
fmt.Sprintf(`SELECT
|
||||
distance,
|
||||
embedding,
|
||||
slug,
|
||||
raw_text,
|
||||
filename
|
||||
FROM %s
|
||||
WHERE embedding MATCH ?
|
||||
ORDER BY distance
|
||||
LIMIT 3
|
||||
`, tableName))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
query, err := sqlite_vec.SerializeFloat32(q[:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := stmt.BindBlob(1, query); err != nil {
|
||||
p.logger.Error("failed to bind", "error", err)
|
||||
return nil, err
|
||||
}
|
||||
resp := []models.VectorRow{}
|
||||
for stmt.Step() {
|
||||
res := models.VectorRow{}
|
||||
res.Distance = float32(stmt.ColumnFloat(0))
|
||||
emb := stmt.ColumnRawText(1)
|
||||
res.Embeddings = decodeUnsafe(emb)
|
||||
res.Slug = stmt.ColumnText(2)
|
||||
res.RawText = stmt.ColumnText(3)
|
||||
res.FileName = stmt.ColumnText(4)
|
||||
resp = append(resp, res)
|
||||
}
|
||||
if err := stmt.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = stmt.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return resp, nil
|
||||
// TODO: This function has been temporarily disabled to avoid deprecated library usage.
|
||||
// In the new RAG implementation, this functionality is now in rag_new package.
|
||||
// For compatibility, return empty result instead of using deprecated vector extension.
|
||||
return []models.VectorRow{}, nil
|
||||
}
|
||||
|
||||
func (p ProviderSQL) ListFiles() ([]string, error) {
|
||||
|
||||
Reference in New Issue
Block a user