Feat: llama.cpp model switch
This commit is contained in:
29
bot.go
29
bot.go
@@ -65,6 +65,7 @@ var (
|
||||
"google/gemma-3-27b-it:free",
|
||||
"meta-llama/llama-3.3-70b-instruct:free",
|
||||
}
|
||||
LocalModels = []string{}
|
||||
)
|
||||
|
||||
// cleanNullMessages removes messages with null or empty content to prevent API issues
|
||||
@@ -187,7 +188,7 @@ func createClient(connectTimeout time.Duration) *http.Client {
|
||||
}
|
||||
}
|
||||
|
||||
func fetchLCPModelName() *models.LLMModels {
|
||||
func fetchLCPModelName() *models.LCPModels {
|
||||
//nolint
|
||||
resp, err := httpClient.Get(cfg.FetchModelNameAPI)
|
||||
if err != nil {
|
||||
@@ -199,7 +200,7 @@ func fetchLCPModelName() *models.LLMModels {
|
||||
return nil
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
llmModel := models.LLMModels{}
|
||||
llmModel := models.LCPModels{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&llmModel); err != nil {
|
||||
logger.Warn("failed to decode resp", "link", cfg.FetchModelNameAPI, "error", err)
|
||||
return nil
|
||||
@@ -255,6 +256,24 @@ func fetchORModels(free bool) ([]string, error) {
|
||||
return freeModels, nil
|
||||
}
|
||||
|
||||
func fetchLCPModels() ([]string, error) {
|
||||
resp, err := http.Get(cfg.FetchModelNameAPI)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != 200 {
|
||||
err := fmt.Errorf("failed to fetch or models; status: %s", resp.Status)
|
||||
return nil, err
|
||||
}
|
||||
data := &models.LCPModels{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
localModels := data.ListModels()
|
||||
return localModels, nil
|
||||
}
|
||||
|
||||
func sendMsgToLLM(body io.Reader) {
|
||||
choseChunkParser()
|
||||
|
||||
@@ -869,6 +888,12 @@ func init() {
|
||||
}
|
||||
}()
|
||||
}
|
||||
go func() {
|
||||
LocalModels, err = fetchLCPModels()
|
||||
if err != nil {
|
||||
logger.Error("failed to fetch llama.cpp models", "error", err)
|
||||
}
|
||||
}()
|
||||
choseChunkParser()
|
||||
httpClient = createClient(time.Second * 15)
|
||||
if cfg.TTS_ENABLED {
|
||||
|
||||
2
llm.go
2
llm.go
@@ -157,7 +157,7 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro
|
||||
|
||||
logger.Debug("checking prompt for /completion", "tool_use", cfg.ToolUse,
|
||||
"msg", msg, "resume", resume, "prompt", prompt, "multimodal_data_count", len(multimodalData))
|
||||
payload := models.NewLCPReq(prompt, multimodalData, defaultLCPProps, chatBody.MakeStopSlice())
|
||||
payload := models.NewLCPReq(prompt, chatBody.Model, multimodalData, defaultLCPProps, chatBody.MakeStopSlice())
|
||||
data, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
logger.Error("failed to form a msg", "error", err)
|
||||
|
||||
1
main.go
1
main.go
@@ -16,6 +16,7 @@ var (
|
||||
selectedIndex = int(-1)
|
||||
currentAPIIndex = 0 // Index to track current API in ApiLinks slice
|
||||
currentORModelIndex = 0 // Index to track current OpenRouter model in ORFreeModels slice
|
||||
currentLocalModelIndex = 0 // Index to track current llama.cpp model
|
||||
shellMode = false
|
||||
// indexLine = "F12 to show keys help | bot resp mode: [orange:-:b]%v[-:-:-] (F6) | card's char: [orange:-:b]%s[-:-:-] (ctrl+s) | chat: [orange:-:b]%s[-:-:-] (F1) | toolUseAdviced: [orange:-:b]%v[-:-:-] (ctrl+k) | model: [orange:-:b]%s[-:-:-] (ctrl+l) | skip LLM resp: [orange:-:b]%v[-:-:-] (F10)\nAPI_URL: [orange:-:b]%s[-:-:-] (ctrl+v) | ThinkUse: [orange:-:b]%v[-:-:-] (ctrl+p) | Log Level: [orange:-:b]%v[-:-:-] (ctrl+p) | Recording: [orange:-:b]%v[-:-:-] (ctrl+r) | Writing as: [orange:-:b]%s[-:-:-] (ctrl+q)"
|
||||
indexLineCompletion = "F12 to show keys help | bot resp mode: [orange:-:b]%v[-:-:-] (F6) | card's char: [orange:-:b]%s[-:-:-] (ctrl+s) | chat: [orange:-:b]%s[-:-:-] (F1) | toolUseAdviced: [orange:-:b]%v[-:-:-] (ctrl+k) | model: [orange:-:b]%s[-:-:-] (ctrl+l) | skip LLM resp: [orange:-:b]%v[-:-:-] (F10)\nAPI_URL: [orange:-:b]%s[-:-:-] (ctrl+v) | Insert <think>: [orange:-:b]%v[-:-:-] (ctrl+p) | Log Level: [orange:-:b]%v[-:-:-] (ctrl+p) | Recording: [orange:-:b]%v[-:-:-] (ctrl+r) | Writing as: [orange:-:b]%s[-:-:-] (ctrl+q) | Bot will write as [orange:-:b]%s[-:-:-] (ctrl+x) | role_inject [orange:-:b]%v[-:-:-]"
|
||||
|
||||
@@ -420,25 +420,26 @@ type OpenAIReq struct {
|
||||
|
||||
// ===
|
||||
|
||||
type LLMModels struct {
|
||||
Object string `json:"object"`
|
||||
Data []struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
Created int `json:"created"`
|
||||
OwnedBy string `json:"owned_by"`
|
||||
Meta struct {
|
||||
VocabType int `json:"vocab_type"`
|
||||
NVocab int `json:"n_vocab"`
|
||||
NCtxTrain int `json:"n_ctx_train"`
|
||||
NEmbd int `json:"n_embd"`
|
||||
NParams int64 `json:"n_params"`
|
||||
Size int64 `json:"size"`
|
||||
} `json:"meta"`
|
||||
} `json:"data"`
|
||||
}
|
||||
// type LLMModels struct {
|
||||
// Object string `json:"object"`
|
||||
// Data []struct {
|
||||
// ID string `json:"id"`
|
||||
// Object string `json:"object"`
|
||||
// Created int `json:"created"`
|
||||
// OwnedBy string `json:"owned_by"`
|
||||
// Meta struct {
|
||||
// VocabType int `json:"vocab_type"`
|
||||
// NVocab int `json:"n_vocab"`
|
||||
// NCtxTrain int `json:"n_ctx_train"`
|
||||
// NEmbd int `json:"n_embd"`
|
||||
// NParams int64 `json:"n_params"`
|
||||
// Size int64 `json:"size"`
|
||||
// } `json:"meta"`
|
||||
// } `json:"data"`
|
||||
// }
|
||||
|
||||
type LlamaCPPReq struct {
|
||||
Model string `json:"model"`
|
||||
Stream bool `json:"stream"`
|
||||
// For multimodal requests, prompt should be an object with prompt_string and multimodal_data
|
||||
// For regular requests, prompt is a string
|
||||
@@ -474,9 +475,8 @@ type PromptObject struct {
|
||||
ImageData []string `json:"image_data,omitempty"` // For compatibility
|
||||
}
|
||||
|
||||
func NewLCPReq(prompt string, multimodalData []string, props map[string]float32, stopStrings []string) LlamaCPPReq {
|
||||
func NewLCPReq(prompt, model string, multimodalData []string, props map[string]float32, stopStrings []string) LlamaCPPReq {
|
||||
var finalPrompt interface{}
|
||||
|
||||
if len(multimodalData) > 0 {
|
||||
// When multimodal data is present, use the object format as per Python example:
|
||||
// { "prompt": { "prompt_string": "...", "multimodal_data": [...] } }
|
||||
@@ -489,8 +489,8 @@ func NewLCPReq(prompt string, multimodalData []string, props map[string]float32,
|
||||
// When no multimodal data, use plain string
|
||||
finalPrompt = prompt
|
||||
}
|
||||
|
||||
return LlamaCPPReq{
|
||||
Model: model,
|
||||
Stream: true,
|
||||
Prompt: finalPrompt,
|
||||
Temperature: props["temperature"],
|
||||
@@ -505,3 +505,27 @@ type LlamaCPPResp struct {
|
||||
Content string `json:"content"`
|
||||
Stop bool `json:"stop"`
|
||||
}
|
||||
|
||||
type LCPModels struct {
|
||||
Data []struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
OwnedBy string `json:"owned_by"`
|
||||
Created int `json:"created"`
|
||||
InCache bool `json:"in_cache"`
|
||||
Path string `json:"path"`
|
||||
Status struct {
|
||||
Value string `json:"value"`
|
||||
Args []string `json:"args"`
|
||||
} `json:"status"`
|
||||
} `json:"data"`
|
||||
Object string `json:"object"`
|
||||
}
|
||||
|
||||
func (lcp *LCPModels) ListModels() []string {
|
||||
resp := []string{}
|
||||
for _, model := range lcp.Data {
|
||||
resp = append(resp, model.ID)
|
||||
}
|
||||
return resp
|
||||
}
|
||||
|
||||
13
tui.go
13
tui.go
@@ -961,11 +961,16 @@ func init() {
|
||||
}
|
||||
updateStatusLine()
|
||||
} else {
|
||||
// For non-OpenRouter APIs, use the old logic
|
||||
go func() {
|
||||
fetchLCPModelName() // blocks
|
||||
if len(LocalModels) > 0 {
|
||||
currentLocalModelIndex = (currentLocalModelIndex + 1) % len(LocalModels)
|
||||
chatBody.Model = LocalModels[currentLocalModelIndex]
|
||||
}
|
||||
updateStatusLine()
|
||||
}()
|
||||
// // For non-OpenRouter APIs, use the old logic
|
||||
// go func() {
|
||||
// fetchLCPModelName() // blocks
|
||||
// updateStatusLine()
|
||||
// }()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user