Feat: llama.cpp model switch

2025-12-12 14:07:10 +03:00
parent 2e2e6e9f9c
commit 9edda1fecf
5 changed files with 105 additions and 50 deletions
--- a/bot.go
+++ b/bot.go
@@ -65,6 +65,7 @@ var (
 		"google/gemma-3-27b-it:free",
 		"meta-llama/llama-3.3-70b-instruct:free",
 	}
+	LocalModels = []string{}
 )

 // cleanNullMessages removes messages with null or empty content to prevent API issues
@@ -187,7 +188,7 @@ func createClient(connectTimeout time.Duration) *http.Client {
 	}
 }

-func fetchLCPModelName() *models.LLMModels {
+func fetchLCPModelName() *models.LCPModels {
 	//nolint
 	resp, err := httpClient.Get(cfg.FetchModelNameAPI)
 	if err != nil {
@@ -199,7 +200,7 @@ func fetchLCPModelName() *models.LLMModels {
 		return nil
 	}
 	defer resp.Body.Close()
-	llmModel := models.LLMModels{}
+	llmModel := models.LCPModels{}
 	if err := json.NewDecoder(resp.Body).Decode(&llmModel); err != nil {
 		logger.Warn("failed to decode resp", "link", cfg.FetchModelNameAPI, "error", err)
 		return nil
@@ -255,6 +256,24 @@ func fetchORModels(free bool) ([]string, error) {
 	return freeModels, nil
 }

+func fetchLCPModels() ([]string, error) {
+	resp, err := http.Get(cfg.FetchModelNameAPI)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != 200 {
+		err := fmt.Errorf("failed to fetch or models; status: %s", resp.Status)
+		return nil, err
+	}
+	data := &models.LCPModels{}
+	if err := json.NewDecoder(resp.Body).Decode(data); err != nil {
+		return nil, err
+	}
+	localModels := data.ListModels()
+	return localModels, nil
+}
+
 func sendMsgToLLM(body io.Reader) {
 	choseChunkParser()

@@ -869,6 +888,12 @@ func init() {
 			}
 		}()
 	}
+	go func() {
+		LocalModels, err = fetchLCPModels()
+		if err != nil {
+			logger.Error("failed to fetch llama.cpp models", "error", err)
+		}
+	}()
 	choseChunkParser()
 	httpClient = createClient(time.Second * 15)
 	if cfg.TTS_ENABLED {
--- a/llm.go
+++ b/llm.go
@@ -157,7 +157,7 @@ func (lcp LCPCompletion) FormMsg(msg, role string, resume bool) (io.Reader, erro

 	logger.Debug("checking prompt for /completion", "tool_use", cfg.ToolUse,
 		"msg", msg, "resume", resume, "prompt", prompt, "multimodal_data_count", len(multimodalData))
-	payload := models.NewLCPReq(prompt, multimodalData, defaultLCPProps, chatBody.MakeStopSlice())
+	payload := models.NewLCPReq(prompt, chatBody.Model, multimodalData, defaultLCPProps, chatBody.MakeStopSlice())
 	data, err := json.Marshal(payload)
 	if err != nil {
 		logger.Error("failed to form a msg", "error", err)
--- a/main.go
+++ b/main.go
@@ -16,6 +16,7 @@ var (
 	selectedIndex          = int(-1)
 	currentAPIIndex        = 0 // Index to track current API in ApiLinks slice
 	currentORModelIndex    = 0 // Index to track current OpenRouter model in ORFreeModels slice
+	currentLocalModelIndex = 0 // Index to track current llama.cpp model
 	shellMode              = false
 	// indexLine           = "F12 to show keys help | bot resp mode: [orange:-:b]%v[-:-:-] (F6) | card's char: [orange:-:b]%s[-:-:-] (ctrl+s) | chat: [orange:-:b]%s[-:-:-] (F1) | toolUseAdviced: [orange:-:b]%v[-:-:-] (ctrl+k) | model: [orange:-:b]%s[-:-:-] (ctrl+l) | skip LLM resp: [orange:-:b]%v[-:-:-] (F10)\nAPI_URL: [orange:-:b]%s[-:-:-] (ctrl+v) | ThinkUse: [orange:-:b]%v[-:-:-] (ctrl+p) | Log Level: [orange:-:b]%v[-:-:-] (ctrl+p) | Recording: [orange:-:b]%v[-:-:-] (ctrl+r) | Writing as: [orange:-:b]%s[-:-:-] (ctrl+q)"
 	indexLineCompletion = "F12 to show keys help | bot resp mode: [orange:-:b]%v[-:-:-] (F6) | card's char: [orange:-:b]%s[-:-:-] (ctrl+s) | chat: [orange:-:b]%s[-:-:-] (F1) | toolUseAdviced: [orange:-:b]%v[-:-:-] (ctrl+k) | model: [orange:-:b]%s[-:-:-] (ctrl+l) | skip LLM resp: [orange:-:b]%v[-:-:-] (F10)\nAPI_URL: [orange:-:b]%s[-:-:-] (ctrl+v) | Insert <think>: [orange:-:b]%v[-:-:-] (ctrl+p) | Log Level: [orange:-:b]%v[-:-:-] (ctrl+p) | Recording: [orange:-:b]%v[-:-:-] (ctrl+r) | Writing as: [orange:-:b]%s[-:-:-] (ctrl+q) | Bot will write as [orange:-:b]%s[-:-:-] (ctrl+x) | role_inject [orange:-:b]%v[-:-:-]"
--- a/models/models.go
+++ b/models/models.go
@@ -420,25 +420,26 @@ type OpenAIReq struct {

 // ===

-type LLMModels struct {
-	Object string `json:"object"`
-	Data   []struct {
-		ID      string `json:"id"`
-		Object  string `json:"object"`
-		Created int    `json:"created"`
-		OwnedBy string `json:"owned_by"`
-		Meta    struct {
-			VocabType int   `json:"vocab_type"`
-			NVocab    int   `json:"n_vocab"`
-			NCtxTrain int   `json:"n_ctx_train"`
-			NEmbd     int   `json:"n_embd"`
-			NParams   int64 `json:"n_params"`
-			Size      int64 `json:"size"`
-		} `json:"meta"`
-	} `json:"data"`
-}
+// type LLMModels struct {
+// 	Object string `json:"object"`
+// 	Data   []struct {
+// 		ID      string `json:"id"`
+// 		Object  string `json:"object"`
+// 		Created int    `json:"created"`
+// 		OwnedBy string `json:"owned_by"`
+// 		Meta    struct {
+// 			VocabType int   `json:"vocab_type"`
+// 			NVocab    int   `json:"n_vocab"`
+// 			NCtxTrain int   `json:"n_ctx_train"`
+// 			NEmbd     int   `json:"n_embd"`
+// 			NParams   int64 `json:"n_params"`
+// 			Size      int64 `json:"size"`
+// 		} `json:"meta"`
+// 	} `json:"data"`
+// }

 type LlamaCPPReq struct {
+	Model  string `json:"model"`
 	Stream bool   `json:"stream"`
 	// For multimodal requests, prompt should be an object with prompt_string and multimodal_data
 	// For regular requests, prompt is a string
@@ -474,9 +475,8 @@ type PromptObject struct {
 	ImageData []string `json:"image_data,omitempty"` // For compatibility
 }

-func NewLCPReq(prompt string, multimodalData []string, props map[string]float32, stopStrings []string) LlamaCPPReq {
+func NewLCPReq(prompt, model string, multimodalData []string, props map[string]float32, stopStrings []string) LlamaCPPReq {
 	var finalPrompt interface{}
-
 	if len(multimodalData) > 0 {
 		// When multimodal data is present, use the object format as per Python example:
 		// { "prompt": { "prompt_string": "...", "multimodal_data": [...] } }
@@ -489,8 +489,8 @@ func NewLCPReq(prompt string, multimodalData []string, props map[string]float32,
 		// When no multimodal data, use plain string
 		finalPrompt = prompt
 	}
-
 	return LlamaCPPReq{
+		Model:         model,
 		Stream:        true,
 		Prompt:        finalPrompt,
 		Temperature:   props["temperature"],
@@ -505,3 +505,27 @@ type LlamaCPPResp struct {
 	Content string `json:"content"`
 	Stop    bool   `json:"stop"`
 }
+
+type LCPModels struct {
+	Data []struct {
+		ID      string `json:"id"`
+		Object  string `json:"object"`
+		OwnedBy string `json:"owned_by"`
+		Created int    `json:"created"`
+		InCache bool   `json:"in_cache"`
+		Path    string `json:"path"`
+		Status  struct {
+			Value string   `json:"value"`
+			Args  []string `json:"args"`
+		} `json:"status"`
+	} `json:"data"`
+	Object string `json:"object"`
+}
+
+func (lcp *LCPModels) ListModels() []string {
+	resp := []string{}
+	for _, model := range lcp.Data {
+		resp = append(resp, model.ID)
+	}
+	return resp
+}
--- a/tui.go
+++ b/tui.go
@@ -961,11 +961,16 @@ func init() {
 				}
 				updateStatusLine()
 			} else {
-				// For non-OpenRouter APIs, use the old logic
-				go func() {
-					fetchLCPModelName() // blocks
+				if len(LocalModels) > 0 {
+					currentLocalModelIndex = (currentLocalModelIndex + 1) % len(LocalModels)
+					chatBody.Model = LocalModels[currentLocalModelIndex]
+				}
 				updateStatusLine()
-				}()
+				// // For non-OpenRouter APIs, use the old logic
+				// go func() {
+				// 	fetchLCPModelName() // blocks
+				// 	updateStatusLine()
+				// }()
 			}
 			return nil
 		}