Feat: llamacpp /completion attempt

2025-01-22 20:17:49 +03:00
parent c41ff09b2f
commit 75f51c1a19
4 changed files with 176 additions and 86 deletions
--- a/README.md
+++ b/README.md
@@ -40,6 +40,7 @@
 - consider adding use /completion of llamacpp, since openai endpoint clearly has template|format issues;
 - change temp, min-p and other params from tui;
 - DRY;
 - keybind to switch between openai and llamacpp endpoints;
 ### FIX:
 - bot responding (or hanging) blocks everything; +
@@ -63,3 +64,4 @@
 - number of sentences in a batch should depend on number of words there. +
 - F1 can load any chat, by loading chat of other agent it does not switch agents, if that chat is continued, it will rewrite agent in db; (either allow only chats from current agent OR switch agent on chat loading); +
 - after chat is deleted: load undeleted chat; +
 - name split for llamacpp completion. user msg should end with 'bot_name:';
--- a/bot.go
+++ b/bot.go
@@ -2,7 +2,6 @@ package main
 import (
 	"bufio"
 	"bytes"
 	"elefant/config"
 	"elefant/models"
 	"elefant/rag"
@@ -37,32 +36,38 @@ var (
 	interruptResp       = false
 	ragger              *rag.RAG
 	currentModel        = "none"
 	chunkParser         ChunkParser
 	defaultLCPProps     = map[string]float32{
 		"temperature":    0.8,
 		"dry_multiplier": 0.6,
 	}
 )
 // ====
-func formMsg(chatBody *models.ChatBody, newMsg, role string) io.Reader {
+// DEPRECATED
-	if newMsg != "" { // otherwise let the bot continue
+// func formMsg(chatBody *models.ChatBody, newMsg, role string) io.Reader {
-		newMsg := models.RoleMsg{Role: role, Content: newMsg}
+// 	if newMsg != "" { // otherwise let the bot continue
-		chatBody.Messages = append(chatBody.Messages, newMsg)
+// 		newMsg := models.RoleMsg{Role: role, Content: newMsg}
-		// if rag
+// 		chatBody.Messages = append(chatBody.Messages, newMsg)
-		if cfg.RAGEnabled {
+// 		// if rag
-			ragResp, err := chatRagUse(newMsg.Content)
+// 		if cfg.RAGEnabled {
-			if err != nil {
+// 			ragResp, err := chatRagUse(newMsg.Content)
-				logger.Error("failed to form a rag msg", "error", err)
+// 			if err != nil {
-				return nil
+// 				logger.Error("failed to form a rag msg", "error", err)
-			}
+// 				return nil
-			ragMsg := models.RoleMsg{Role: cfg.ToolRole, Content: ragResp}
+// 			}
-			chatBody.Messages = append(chatBody.Messages, ragMsg)
+// 			ragMsg := models.RoleMsg{Role: cfg.ToolRole, Content: ragResp}
-		}
+// 			chatBody.Messages = append(chatBody.Messages, ragMsg)
-	}
+// 		}
-	data, err := json.Marshal(chatBody)
+// 	}
-	if err != nil {
+// 	data, err := json.Marshal(chatBody)
-		logger.Error("failed to form a msg", "error", err)
+// 	if err != nil {
-		return nil
+// 		logger.Error("failed to form a msg", "error", err)
-	}
+// 		return nil
-	return bytes.NewReader(data)
+// 	}
-}
+// 	return bytes.NewReader(data)
 // }
 func fetchModelName() {
 	api := "http://localhost:8080/v1/models"
@@ -85,26 +90,26 @@ func fetchModelName() {
 	updateStatusLine()
 }
-func fetchProps() {
+// func fetchProps() {
-	api := "http://localhost:8080/props"
+// 	api := "http://localhost:8080/props"
-	resp, err := httpClient.Get(api)
+// 	resp, err := httpClient.Get(api)
-	if err != nil {
+// 	if err != nil {
-		logger.Warn("failed to get model", "link", api, "error", err)
+// 		logger.Warn("failed to get model", "link", api, "error", err)
-		return
+// 		return
-	}
+// 	}
-	defer resp.Body.Close()
+// 	defer resp.Body.Close()
-	llmModel := models.LLMModels{}
+// 	llmModel := models.LLMModels{}
-	if err := json.NewDecoder(resp.Body).Decode(&llmModel); err != nil {
+// 	if err := json.NewDecoder(resp.Body).Decode(&llmModel); err != nil {
-		logger.Warn("failed to decode resp", "link", api, "error", err)
+// 		logger.Warn("failed to decode resp", "link", api, "error", err)
-		return
+// 		return
-	}
+// 	}
-	if resp.StatusCode != 200 {
+// 	if resp.StatusCode != 200 {
-		currentModel = "none"
+// 		currentModel = "none"
-		return
+// 		return
-	}
+// 	}
-	currentModel = path.Base(llmModel.Data[0].ID)
+// 	currentModel = path.Base(llmModel.Data[0].ID)
-	updateStatusLine()
+// 	updateStatusLine()
-}
+// }
 // func sendMsgToLLM(body io.Reader) (*models.LLMRespChunk, error) {
 func sendMsgToLLM(body io.Reader) {
@@ -116,7 +121,6 @@ func sendMsgToLLM(body io.Reader) {
 		return
 	}
 	defer resp.Body.Close()
 	// llmResp := []models.LLMRespChunk{}
 	reader := bufio.NewReader(resp.Body)
 	counter := uint32(0)
 	for {
@@ -131,10 +135,13 @@ func sendMsgToLLM(body io.Reader) {
 			streamDone <- true
 			break
 		}
 		llmchunk := models.LLMRespChunk{}
 		line, err := reader.ReadBytes('\n')
 		if err != nil {
-			logger.Error("error reading response body", "error", err)
+			logger.Error("error reading response body", "error", err, "line", string(line))
 			if err.Error() != "EOF" {
 				streamDone <- true
 				break
 			}
 			continue
 		}
 		if len(line) <= 1 {
@@ -142,24 +149,24 @@ func sendMsgToLLM(body io.Reader) {
 		}
 		// starts with -> data:
 		line = line[6:]
-		if err := json.Unmarshal(line, &llmchunk); err != nil {
+		content, stop, err := chunkParser.ParseChunk(line)
-			logger.Error("failed to decode", "error", err, "line", string(line))
+		if err != nil {
 			logger.Error("error parsing response body", "error", err, "line", string(line), "url", cfg.APIURL)
 			streamDone <- true
 			return
 		}
 		// llmResp = append(llmResp, llmchunk)
 		// logger.Info("streamview", "chunk", llmchunk)
 		// if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason != "chat.completion.chunk" {
 		if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" {
 			if llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content != "" {
 				logger.Warn("text inside of finish llmchunk", "chunk", llmchunk, "counter", counter)
 			}
 			streamDone <- true
 			// last chunk
 			break
 		}
 		if stop {
 			if content != "" {
 				logger.Warn("text inside of finish llmchunk", "chunk", content, "counter", counter)
 			}
 			streamDone <- true
 			break
 		}
 		if counter == 0 {
 			content = strings.TrimPrefix(content, " ")
 		}
 		// bot sends way too many \n
-		answerText := strings.ReplaceAll(llmchunk.Choices[0].Delta.Content, "\n\n", "\n")
+		answerText := strings.ReplaceAll(content, "\n\n", "\n")
 		chunkChan <- answerText
 	}
 }
@@ -203,9 +210,10 @@ func chatRagUse(qText string) (string, error) {
 func chatRound(userMsg, role string, tv *tview.TextView, regen bool) {
 	botRespMode = true
-	reader := formMsg(chatBody, userMsg, role)
+	// reader := formMsg(chatBody, userMsg, role)
-	if reader == nil {
+	reader, err := chunkParser.FormMsg(userMsg, role)
-		logger.Error("empty reader from msgs", "role", role)
+	if reader == nil || err != nil {
 		logger.Error("empty reader from msgs", "role", role, "error", err)
 		return
 	}
 	go sendMsgToLLM(reader)
@@ -238,8 +246,7 @@ out:
 	// bot msg is done;
 	// now check it for func call
 	// logChat(activeChatName, chatBody.Messages)
-	err := updateStorageChat(activeChatName, chatBody.Messages)
+	if err := updateStorageChat(activeChatName, chatBody.Messages); err != nil {
 	if err != nil {
 		logger.Warn("failed to update storage", "error", err, "name", activeChatName)
 	}
 	findCall(respText.String(), tv)
@@ -328,8 +335,8 @@ func charToStart(agentName string) bool {
 func runModelNameTicker(n time.Duration) {
 	ticker := time.NewTicker(n)
 	for {
 		<-ticker.C
 		fetchModelName()
 		<-ticker.C
 	}
 }
@@ -339,7 +346,8 @@ func init() {
 		{Role: "system", Content: basicSysMsg},
 		{Role: cfg.AssistantRole, Content: defaultFirstMsg},
 	}
-	logfile, err := os.OpenFile(cfg.LogFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	logfile, err := os.OpenFile(cfg.LogFile,
 		os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
 	if err != nil {
 		logger.Error("failed to open log file", "error", err, "filename", cfg.LogFile)
 		return
@@ -372,6 +380,7 @@ func init() {
 		Stream:   true,
 		Messages: lastChat,
 	}
-	go runModelNameTicker(time.Second * 20)
+	initChunkParser()
 	go runModelNameTicker(time.Second * 120)
 	// tempLoad()
 }
--- a/models/models.go
+++ b/models/models.go
@@ -57,28 +57,33 @@ type RoleMsg struct {
 }
 func (m RoleMsg) ToText(i int, cfg *config.Config) string {
-	icon := ""
+	icon := fmt.Sprintf("(%d)", i)
-	switch m.Role {
+	if !strings.HasPrefix(m.Content, cfg.UserRole+":") && !strings.HasPrefix(m.Content, cfg.AssistantRole+":") {
-	case "assistant":
+		switch m.Role {
-		icon = fmt.Sprintf("(%d) %s", i, cfg.AssistantIcon)
+		case "assistant":
-	case "user":
+			icon = fmt.Sprintf("(%d) %s", i, cfg.AssistantIcon)
-		icon = fmt.Sprintf("(%d) %s", i, cfg.UserIcon)
+		case "user":
-	case "system":
+			icon = fmt.Sprintf("(%d) %s", i, cfg.UserIcon)
-		icon = fmt.Sprintf("(%d) <system>: ", i)
+		case "system":
-	case "tool":
+			icon = fmt.Sprintf("(%d) <system>: ", i)
-		icon = fmt.Sprintf("(%d) %s", i, cfg.ToolIcon)
+		case "tool":
-	default:
+			icon = fmt.Sprintf("(%d) %s", i, cfg.ToolIcon)
-		icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role)
+		default:
 			icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role)
 		}
 	}
 	textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, m.Content)
 	return strings.ReplaceAll(textMsg, "\n\n", "\n")
 }
 func (m RoleMsg) ToPrompt() string {
 	return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, m.Content), "\n\n", "\n")
 }
 type ChatBody struct {
-	Model         string    `json:"model"`
+	Model    string    `json:"model"`
-	Stream        bool      `json:"stream"`
+	Stream   bool      `json:"stream"`
-	Messages      []RoleMsg `json:"messages"`
+	Messages []RoleMsg `json:"messages"`
 	DRYMultiplier float32   `json:"frequency_penalty"`
 }
 type ChatToolsBody struct {
@@ -144,3 +149,45 @@ type LLMModels struct {
 		} `json:"meta"`
 	} `json:"data"`
 }
 type LlamaCPPReq struct {
 	Stream bool `json:"stream"`
 	// Messages      []RoleMsg `json:"messages"`
 	Prompt        string   `json:"prompt"`
 	Temperature   float32  `json:"temperature"`
 	DryMultiplier float32  `json:"dry_multiplier"`
 	Stop          []string `json:"stop"`
 	// MaxTokens        int     `json:"max_tokens"`
 	// DryBase          float64 `json:"dry_base"`
 	// DryAllowedLength int     `json:"dry_allowed_length"`
 	// DryPenaltyLastN  int     `json:"dry_penalty_last_n"`
 	// CachePrompt      bool    `json:"cache_prompt"`
 	// DynatempRange    int     `json:"dynatemp_range"`
 	// DynatempExponent int     `json:"dynatemp_exponent"`
 	// TopK             int     `json:"top_k"`
 	// TopP             float32 `json:"top_p"`
 	// MinP             float32 `json:"min_p"`
 	// TypicalP         int     `json:"typical_p"`
 	// XtcProbability   int     `json:"xtc_probability"`
 	// XtcThreshold     float32 `json:"xtc_threshold"`
 	// RepeatLastN      int     `json:"repeat_last_n"`
 	// RepeatPenalty    int     `json:"repeat_penalty"`
 	// PresencePenalty  int     `json:"presence_penalty"`
 	// FrequencyPenalty int     `json:"frequency_penalty"`
 	// Samplers         string  `json:"samplers"`
 }
 func NewLCPReq(prompt, role string) LlamaCPPReq {
 	return LlamaCPPReq{
 		Stream:        true,
 		Prompt:        prompt,
 		Temperature:   0.8,
 		DryMultiplier: 0.5,
 		Stop:          []string{role + ":\n", "<|im_end|>"},
 	}
 }
 type LlamaCPPResp struct {
 	Content string `json:"content"`
 	Stop    bool   `json:"stop"`
 }
--- a/tui.go
+++ b/tui.go
@@ -37,6 +37,7 @@ var (
 	renamePage     = "renamePage"
 	RAGPage        = "RAGPage "
 	longStatusPage = "longStatusPage"
 	propsPage      = "propsPage"
 	// help text
 	helpText = `
 [yellow]Esc[white]: send msg
@@ -129,6 +130,36 @@ func startNewChat() {
 	colorText()
 }
 func makePropsForm(props map[string]float32) *tview.Form {
 	form := tview.NewForm().
 		AddTextView("Notes", "Props for llamacpp completion call", 40, 2, true, false).
 		AddCheckbox("Age 18+", false, nil).
 		AddButton("Quit", func() {
 			pages.RemovePage(propsPage)
 		})
 	form.AddButton("Save", func() {
 		defer pages.RemovePage(propsPage)
 		for pn := range props {
 			propField, ok := form.GetFormItemByLabel(pn).(*tview.InputField)
 			if !ok {
 				logger.Warn("failed to convert to inputfield", "prop_name", pn)
 				continue
 			}
 			val, err := strconv.ParseFloat(propField.GetText(), 32)
 			if err != nil {
 				logger.Warn("failed parse to float", "value", propField.GetText())
 				continue
 			}
 			props[pn] = float32(val)
 		}
 	})
 	for propName, value := range props {
 		form.AddInputField(propName, fmt.Sprintf("%v", value), 20, tview.InputFieldFloat, nil)
 	}
 	form.SetBorder(true).SetTitle("Enter some data").SetTitleAlign(tview.AlignLeft)
 	return form
 }
 func init() {
 	theme := tview.Theme{
 		PrimitiveBackgroundColor:    tcell.ColorDefault,
@@ -420,8 +451,9 @@ func init() {
 			}
 			return nil
 		}
-		if event.Key() == tcell.KeyCtrlA {
+		if event.Key() == tcell.KeyCtrlP {
-			textArea.SetText("pressed ctrl+a", true)
+			propsForm := makePropsForm(defaultLCPProps)
 			pages.AddPage(propsPage, propsForm, true, true)
 			return nil
 		}
 		if event.Key() == tcell.KeyCtrlN {