Feat: llamacpp /completion attempt

2025-01-22 20:17:49 +03:00
parent c41ff09b2f
commit 75f51c1a19
4 changed files with 176 additions and 86 deletions
--- a/README.md
+++ b/README.md
@@ -40,6 +40,7 @@
 - consider adding use /completion of llamacpp, since openai endpoint clearly has template|format issues;
 - change temp, min-p and other params from tui;
 - DRY;
+- keybind to switch between openai and llamacpp endpoints;

 ### FIX:
 - bot responding (or hanging) blocks everything; +
@@ -63,3 +64,4 @@
 - number of sentences in a batch should depend on number of words there. +
 - F1 can load any chat, by loading chat of other agent it does not switch agents, if that chat is continued, it will rewrite agent in db; (either allow only chats from current agent OR switch agent on chat loading); +
 - after chat is deleted: load undeleted chat; +
+- name split for llamacpp completion. user msg should end with 'bot_name:';
--- a/bot.go
+++ b/bot.go
@@ -2,7 +2,6 @@ package main

 import (
 	"bufio"
-	"bytes"
 	"elefant/config"
 	"elefant/models"
 	"elefant/rag"
@@ -37,32 +36,38 @@ var (
 	interruptResp       = false
 	ragger              *rag.RAG
 	currentModel        = "none"
+	chunkParser         ChunkParser
+	defaultLCPProps     = map[string]float32{
+		"temperature":    0.8,
+		"dry_multiplier": 0.6,
+	}
 )

 // ====

-func formMsg(chatBody *models.ChatBody, newMsg, role string) io.Reader {
-	if newMsg != "" { // otherwise let the bot continue
-		newMsg := models.RoleMsg{Role: role, Content: newMsg}
-		chatBody.Messages = append(chatBody.Messages, newMsg)
-		// if rag
-		if cfg.RAGEnabled {
-			ragResp, err := chatRagUse(newMsg.Content)
-			if err != nil {
-				logger.Error("failed to form a rag msg", "error", err)
-				return nil
-			}
-			ragMsg := models.RoleMsg{Role: cfg.ToolRole, Content: ragResp}
-			chatBody.Messages = append(chatBody.Messages, ragMsg)
-		}
-	}
-	data, err := json.Marshal(chatBody)
-	if err != nil {
-		logger.Error("failed to form a msg", "error", err)
-		return nil
-	}
-	return bytes.NewReader(data)
-}
+// DEPRECATED
+// func formMsg(chatBody *models.ChatBody, newMsg, role string) io.Reader {
+// 	if newMsg != "" { // otherwise let the bot continue
+// 		newMsg := models.RoleMsg{Role: role, Content: newMsg}
+// 		chatBody.Messages = append(chatBody.Messages, newMsg)
+// 		// if rag
+// 		if cfg.RAGEnabled {
+// 			ragResp, err := chatRagUse(newMsg.Content)
+// 			if err != nil {
+// 				logger.Error("failed to form a rag msg", "error", err)
+// 				return nil
+// 			}
+// 			ragMsg := models.RoleMsg{Role: cfg.ToolRole, Content: ragResp}
+// 			chatBody.Messages = append(chatBody.Messages, ragMsg)
+// 		}
+// 	}
+// 	data, err := json.Marshal(chatBody)
+// 	if err != nil {
+// 		logger.Error("failed to form a msg", "error", err)
+// 		return nil
+// 	}
+// 	return bytes.NewReader(data)
+// }

 func fetchModelName() {
 	api := "http://localhost:8080/v1/models"
@@ -85,26 +90,26 @@ func fetchModelName() {
 	updateStatusLine()
 }

-func fetchProps() {
-	api := "http://localhost:8080/props"
-	resp, err := httpClient.Get(api)
-	if err != nil {
-		logger.Warn("failed to get model", "link", api, "error", err)
-		return
-	}
-	defer resp.Body.Close()
-	llmModel := models.LLMModels{}
-	if err := json.NewDecoder(resp.Body).Decode(&llmModel); err != nil {
-		logger.Warn("failed to decode resp", "link", api, "error", err)
-		return
-	}
-	if resp.StatusCode != 200 {
-		currentModel = "none"
-		return
-	}
-	currentModel = path.Base(llmModel.Data[0].ID)
-	updateStatusLine()
-}
+// func fetchProps() {
+// 	api := "http://localhost:8080/props"
+// 	resp, err := httpClient.Get(api)
+// 	if err != nil {
+// 		logger.Warn("failed to get model", "link", api, "error", err)
+// 		return
+// 	}
+// 	defer resp.Body.Close()
+// 	llmModel := models.LLMModels{}
+// 	if err := json.NewDecoder(resp.Body).Decode(&llmModel); err != nil {
+// 		logger.Warn("failed to decode resp", "link", api, "error", err)
+// 		return
+// 	}
+// 	if resp.StatusCode != 200 {
+// 		currentModel = "none"
+// 		return
+// 	}
+// 	currentModel = path.Base(llmModel.Data[0].ID)
+// 	updateStatusLine()
+// }

 // func sendMsgToLLM(body io.Reader) (*models.LLMRespChunk, error) {
 func sendMsgToLLM(body io.Reader) {
@@ -116,7 +121,6 @@ func sendMsgToLLM(body io.Reader) {
 		return
 	}
 	defer resp.Body.Close()
-	// llmResp := []models.LLMRespChunk{}
 	reader := bufio.NewReader(resp.Body)
 	counter := uint32(0)
 	for {
@@ -131,10 +135,13 @@ func sendMsgToLLM(body io.Reader) {
 			streamDone <- true
 			break
 		}
-		llmchunk := models.LLMRespChunk{}
 		line, err := reader.ReadBytes('\n')
 		if err != nil {
-			logger.Error("error reading response body", "error", err)
+			logger.Error("error reading response body", "error", err, "line", string(line))
+			if err.Error() != "EOF" {
+				streamDone <- true
+				break
+			}
 			continue
 		}
 		if len(line) <= 1 {
@@ -142,24 +149,24 @@ func sendMsgToLLM(body io.Reader) {
 		}
 		// starts with -> data:
 		line = line[6:]
-		if err := json.Unmarshal(line, &llmchunk); err != nil {
-			logger.Error("failed to decode", "error", err, "line", string(line))
+		content, stop, err := chunkParser.ParseChunk(line)
+		if err != nil {
+			logger.Error("error parsing response body", "error", err, "line", string(line), "url", cfg.APIURL)
 			streamDone <- true
-			return
-		}
-		// llmResp = append(llmResp, llmchunk)
-		// logger.Info("streamview", "chunk", llmchunk)
-		// if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason != "chat.completion.chunk" {
-		if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" {
-			if llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content != "" {
-				logger.Warn("text inside of finish llmchunk", "chunk", llmchunk, "counter", counter)
-			}
-			streamDone <- true
-			// last chunk
 			break
 		}
+		if stop {
+			if content != "" {
+				logger.Warn("text inside of finish llmchunk", "chunk", content, "counter", counter)
+			}
+			streamDone <- true
+			break
+		}
+		if counter == 0 {
+			content = strings.TrimPrefix(content, " ")
+		}
 		// bot sends way too many \n
-		answerText := strings.ReplaceAll(llmchunk.Choices[0].Delta.Content, "\n\n", "\n")
+		answerText := strings.ReplaceAll(content, "\n\n", "\n")
 		chunkChan <- answerText
 	}
 }
@@ -203,9 +210,10 @@ func chatRagUse(qText string) (string, error) {

 func chatRound(userMsg, role string, tv *tview.TextView, regen bool) {
 	botRespMode = true
-	reader := formMsg(chatBody, userMsg, role)
-	if reader == nil {
-		logger.Error("empty reader from msgs", "role", role)
+	// reader := formMsg(chatBody, userMsg, role)
+	reader, err := chunkParser.FormMsg(userMsg, role)
+	if reader == nil || err != nil {
+		logger.Error("empty reader from msgs", "role", role, "error", err)
 		return
 	}
 	go sendMsgToLLM(reader)
@@ -238,8 +246,7 @@ out:
 	// bot msg is done;
 	// now check it for func call
 	// logChat(activeChatName, chatBody.Messages)
-	err := updateStorageChat(activeChatName, chatBody.Messages)
-	if err != nil {
+	if err := updateStorageChat(activeChatName, chatBody.Messages); err != nil {
 		logger.Warn("failed to update storage", "error", err, "name", activeChatName)
 	}
 	findCall(respText.String(), tv)
@@ -328,8 +335,8 @@ func charToStart(agentName string) bool {
 func runModelNameTicker(n time.Duration) {
 	ticker := time.NewTicker(n)
 	for {
-		<-ticker.C
 		fetchModelName()
+		<-ticker.C
 	}
 }

@@ -339,7 +346,8 @@ func init() {
 		{Role: "system", Content: basicSysMsg},
 		{Role: cfg.AssistantRole, Content: defaultFirstMsg},
 	}
-	logfile, err := os.OpenFile(cfg.LogFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	logfile, err := os.OpenFile(cfg.LogFile,
+		os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
 	if err != nil {
 		logger.Error("failed to open log file", "error", err, "filename", cfg.LogFile)
 		return
@@ -372,6 +380,7 @@ func init() {
 		Stream:   true,
 		Messages: lastChat,
 	}
-	go runModelNameTicker(time.Second * 20)
+	initChunkParser()
+	go runModelNameTicker(time.Second * 120)
 	// tempLoad()
 }
--- a/models/models.go
+++ b/models/models.go
@@ -57,28 +57,33 @@ type RoleMsg struct {
 }

 func (m RoleMsg) ToText(i int, cfg *config.Config) string {
-	icon := ""
-	switch m.Role {
-	case "assistant":
-		icon = fmt.Sprintf("(%d) %s", i, cfg.AssistantIcon)
-	case "user":
-		icon = fmt.Sprintf("(%d) %s", i, cfg.UserIcon)
-	case "system":
-		icon = fmt.Sprintf("(%d) <system>: ", i)
-	case "tool":
-		icon = fmt.Sprintf("(%d) %s", i, cfg.ToolIcon)
-	default:
-		icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role)
+	icon := fmt.Sprintf("(%d)", i)
+	if !strings.HasPrefix(m.Content, cfg.UserRole+":") && !strings.HasPrefix(m.Content, cfg.AssistantRole+":") {
+		switch m.Role {
+		case "assistant":
+			icon = fmt.Sprintf("(%d) %s", i, cfg.AssistantIcon)
+		case "user":
+			icon = fmt.Sprintf("(%d) %s", i, cfg.UserIcon)
+		case "system":
+			icon = fmt.Sprintf("(%d) <system>: ", i)
+		case "tool":
+			icon = fmt.Sprintf("(%d) %s", i, cfg.ToolIcon)
+		default:
+			icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role)
+		}
 	}
 	textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, m.Content)
 	return strings.ReplaceAll(textMsg, "\n\n", "\n")
 }

+func (m RoleMsg) ToPrompt() string {
+	return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, m.Content), "\n\n", "\n")
+}
+
 type ChatBody struct {
-	Model         string    `json:"model"`
-	Stream        bool      `json:"stream"`
-	Messages      []RoleMsg `json:"messages"`
-	DRYMultiplier float32   `json:"frequency_penalty"`
+	Model    string    `json:"model"`
+	Stream   bool      `json:"stream"`
+	Messages []RoleMsg `json:"messages"`
 }

 type ChatToolsBody struct {
@@ -144,3 +149,45 @@ type LLMModels struct {
 		} `json:"meta"`
 	} `json:"data"`
 }
+
+type LlamaCPPReq struct {
+	Stream bool `json:"stream"`
+	// Messages      []RoleMsg `json:"messages"`
+	Prompt        string   `json:"prompt"`
+	Temperature   float32  `json:"temperature"`
+	DryMultiplier float32  `json:"dry_multiplier"`
+	Stop          []string `json:"stop"`
+	// MaxTokens        int     `json:"max_tokens"`
+	// DryBase          float64 `json:"dry_base"`
+	// DryAllowedLength int     `json:"dry_allowed_length"`
+	// DryPenaltyLastN  int     `json:"dry_penalty_last_n"`
+	// CachePrompt      bool    `json:"cache_prompt"`
+	// DynatempRange    int     `json:"dynatemp_range"`
+	// DynatempExponent int     `json:"dynatemp_exponent"`
+	// TopK             int     `json:"top_k"`
+	// TopP             float32 `json:"top_p"`
+	// MinP             float32 `json:"min_p"`
+	// TypicalP         int     `json:"typical_p"`
+	// XtcProbability   int     `json:"xtc_probability"`
+	// XtcThreshold     float32 `json:"xtc_threshold"`
+	// RepeatLastN      int     `json:"repeat_last_n"`
+	// RepeatPenalty    int     `json:"repeat_penalty"`
+	// PresencePenalty  int     `json:"presence_penalty"`
+	// FrequencyPenalty int     `json:"frequency_penalty"`
+	// Samplers         string  `json:"samplers"`
+}
+
+func NewLCPReq(prompt, role string) LlamaCPPReq {
+	return LlamaCPPReq{
+		Stream:        true,
+		Prompt:        prompt,
+		Temperature:   0.8,
+		DryMultiplier: 0.5,
+		Stop:          []string{role + ":\n", "<|im_end|>"},
+	}
+}
+
+type LlamaCPPResp struct {
+	Content string `json:"content"`
+	Stop    bool   `json:"stop"`
+}
--- a/tui.go
+++ b/tui.go
@@ -37,6 +37,7 @@ var (
 	renamePage     = "renamePage"
 	RAGPage        = "RAGPage "
 	longStatusPage = "longStatusPage"
+	propsPage      = "propsPage"
 	// help text
 	helpText = `
 [yellow]Esc[white]: send msg
@@ -129,6 +130,36 @@ func startNewChat() {
 	colorText()
 }

+func makePropsForm(props map[string]float32) *tview.Form {
+	form := tview.NewForm().
+		AddTextView("Notes", "Props for llamacpp completion call", 40, 2, true, false).
+		AddCheckbox("Age 18+", false, nil).
+		AddButton("Quit", func() {
+			pages.RemovePage(propsPage)
+		})
+	form.AddButton("Save", func() {
+		defer pages.RemovePage(propsPage)
+		for pn := range props {
+			propField, ok := form.GetFormItemByLabel(pn).(*tview.InputField)
+			if !ok {
+				logger.Warn("failed to convert to inputfield", "prop_name", pn)
+				continue
+			}
+			val, err := strconv.ParseFloat(propField.GetText(), 32)
+			if err != nil {
+				logger.Warn("failed parse to float", "value", propField.GetText())
+				continue
+			}
+			props[pn] = float32(val)
+		}
+	})
+	for propName, value := range props {
+		form.AddInputField(propName, fmt.Sprintf("%v", value), 20, tview.InputFieldFloat, nil)
+	}
+	form.SetBorder(true).SetTitle("Enter some data").SetTitleAlign(tview.AlignLeft)
+	return form
+}
+
 func init() {
 	theme := tview.Theme{
 		PrimitiveBackgroundColor:    tcell.ColorDefault,
@@ -420,8 +451,9 @@ func init() {
 			}
 			return nil
 		}
-		if event.Key() == tcell.KeyCtrlA {
-			textArea.SetText("pressed ctrl+a", true)
+		if event.Key() == tcell.KeyCtrlP {
+			propsForm := makePropsForm(defaultLCPProps)
+			pages.AddPage(propsPage, propsForm, true, true)
 			return nil
 		}
 		if event.Key() == tcell.KeyCtrlN {