Feat: llamacpp /completion attempt

This commit is contained in:
Grail Finder
2025-01-22 20:17:49 +03:00
parent c41ff09b2f
commit 75f51c1a19
4 changed files with 176 additions and 86 deletions

View File

@@ -40,6 +40,7 @@
- consider adding use /completion of llamacpp, since openai endpoint clearly has template|format issues; - consider adding use /completion of llamacpp, since openai endpoint clearly has template|format issues;
- change temp, min-p and other params from tui; - change temp, min-p and other params from tui;
- DRY; - DRY;
- keybind to switch between openai and llamacpp endpoints;
### FIX: ### FIX:
- bot responding (or hanging) blocks everything; + - bot responding (or hanging) blocks everything; +
@@ -63,3 +64,4 @@
- number of sentences in a batch should depend on number of words there. + - number of sentences in a batch should depend on number of words there. +
- F1 can load any chat, by loading chat of other agent it does not switch agents, if that chat is continued, it will rewrite agent in db; (either allow only chats from current agent OR switch agent on chat loading); + - F1 can load any chat, by loading chat of other agent it does not switch agents, if that chat is continued, it will rewrite agent in db; (either allow only chats from current agent OR switch agent on chat loading); +
- after chat is deleted: load undeleted chat; + - after chat is deleted: load undeleted chat; +
- name split for llamacpp completion. user msg should end with 'bot_name:';

145
bot.go
View File

@@ -2,7 +2,6 @@ package main
import ( import (
"bufio" "bufio"
"bytes"
"elefant/config" "elefant/config"
"elefant/models" "elefant/models"
"elefant/rag" "elefant/rag"
@@ -37,32 +36,38 @@ var (
interruptResp = false interruptResp = false
ragger *rag.RAG ragger *rag.RAG
currentModel = "none" currentModel = "none"
chunkParser ChunkParser
defaultLCPProps = map[string]float32{
"temperature": 0.8,
"dry_multiplier": 0.6,
}
) )
// ==== // ====
func formMsg(chatBody *models.ChatBody, newMsg, role string) io.Reader { // DEPRECATED
if newMsg != "" { // otherwise let the bot continue // func formMsg(chatBody *models.ChatBody, newMsg, role string) io.Reader {
newMsg := models.RoleMsg{Role: role, Content: newMsg} // if newMsg != "" { // otherwise let the bot continue
chatBody.Messages = append(chatBody.Messages, newMsg) // newMsg := models.RoleMsg{Role: role, Content: newMsg}
// if rag // chatBody.Messages = append(chatBody.Messages, newMsg)
if cfg.RAGEnabled { // // if rag
ragResp, err := chatRagUse(newMsg.Content) // if cfg.RAGEnabled {
if err != nil { // ragResp, err := chatRagUse(newMsg.Content)
logger.Error("failed to form a rag msg", "error", err) // if err != nil {
return nil // logger.Error("failed to form a rag msg", "error", err)
} // return nil
ragMsg := models.RoleMsg{Role: cfg.ToolRole, Content: ragResp} // }
chatBody.Messages = append(chatBody.Messages, ragMsg) // ragMsg := models.RoleMsg{Role: cfg.ToolRole, Content: ragResp}
} // chatBody.Messages = append(chatBody.Messages, ragMsg)
} // }
data, err := json.Marshal(chatBody) // }
if err != nil { // data, err := json.Marshal(chatBody)
logger.Error("failed to form a msg", "error", err) // if err != nil {
return nil // logger.Error("failed to form a msg", "error", err)
} // return nil
return bytes.NewReader(data) // }
} // return bytes.NewReader(data)
// }
func fetchModelName() { func fetchModelName() {
api := "http://localhost:8080/v1/models" api := "http://localhost:8080/v1/models"
@@ -85,26 +90,26 @@ func fetchModelName() {
updateStatusLine() updateStatusLine()
} }
func fetchProps() { // func fetchProps() {
api := "http://localhost:8080/props" // api := "http://localhost:8080/props"
resp, err := httpClient.Get(api) // resp, err := httpClient.Get(api)
if err != nil { // if err != nil {
logger.Warn("failed to get model", "link", api, "error", err) // logger.Warn("failed to get model", "link", api, "error", err)
return // return
} // }
defer resp.Body.Close() // defer resp.Body.Close()
llmModel := models.LLMModels{} // llmModel := models.LLMModels{}
if err := json.NewDecoder(resp.Body).Decode(&llmModel); err != nil { // if err := json.NewDecoder(resp.Body).Decode(&llmModel); err != nil {
logger.Warn("failed to decode resp", "link", api, "error", err) // logger.Warn("failed to decode resp", "link", api, "error", err)
return // return
} // }
if resp.StatusCode != 200 { // if resp.StatusCode != 200 {
currentModel = "none" // currentModel = "none"
return // return
} // }
currentModel = path.Base(llmModel.Data[0].ID) // currentModel = path.Base(llmModel.Data[0].ID)
updateStatusLine() // updateStatusLine()
} // }
// func sendMsgToLLM(body io.Reader) (*models.LLMRespChunk, error) { // func sendMsgToLLM(body io.Reader) (*models.LLMRespChunk, error) {
func sendMsgToLLM(body io.Reader) { func sendMsgToLLM(body io.Reader) {
@@ -116,7 +121,6 @@ func sendMsgToLLM(body io.Reader) {
return return
} }
defer resp.Body.Close() defer resp.Body.Close()
// llmResp := []models.LLMRespChunk{}
reader := bufio.NewReader(resp.Body) reader := bufio.NewReader(resp.Body)
counter := uint32(0) counter := uint32(0)
for { for {
@@ -131,10 +135,13 @@ func sendMsgToLLM(body io.Reader) {
streamDone <- true streamDone <- true
break break
} }
llmchunk := models.LLMRespChunk{}
line, err := reader.ReadBytes('\n') line, err := reader.ReadBytes('\n')
if err != nil { if err != nil {
logger.Error("error reading response body", "error", err) logger.Error("error reading response body", "error", err, "line", string(line))
if err.Error() != "EOF" {
streamDone <- true
break
}
continue continue
} }
if len(line) <= 1 { if len(line) <= 1 {
@@ -142,24 +149,24 @@ func sendMsgToLLM(body io.Reader) {
} }
// starts with -> data: // starts with -> data:
line = line[6:] line = line[6:]
if err := json.Unmarshal(line, &llmchunk); err != nil { content, stop, err := chunkParser.ParseChunk(line)
logger.Error("failed to decode", "error", err, "line", string(line)) if err != nil {
logger.Error("error parsing response body", "error", err, "line", string(line), "url", cfg.APIURL)
streamDone <- true streamDone <- true
return
}
// llmResp = append(llmResp, llmchunk)
// logger.Info("streamview", "chunk", llmchunk)
// if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason != "chat.completion.chunk" {
if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" {
if llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content != "" {
logger.Warn("text inside of finish llmchunk", "chunk", llmchunk, "counter", counter)
}
streamDone <- true
// last chunk
break break
} }
if stop {
if content != "" {
logger.Warn("text inside of finish llmchunk", "chunk", content, "counter", counter)
}
streamDone <- true
break
}
if counter == 0 {
content = strings.TrimPrefix(content, " ")
}
// bot sends way too many \n // bot sends way too many \n
answerText := strings.ReplaceAll(llmchunk.Choices[0].Delta.Content, "\n\n", "\n") answerText := strings.ReplaceAll(content, "\n\n", "\n")
chunkChan <- answerText chunkChan <- answerText
} }
} }
@@ -203,9 +210,10 @@ func chatRagUse(qText string) (string, error) {
func chatRound(userMsg, role string, tv *tview.TextView, regen bool) { func chatRound(userMsg, role string, tv *tview.TextView, regen bool) {
botRespMode = true botRespMode = true
reader := formMsg(chatBody, userMsg, role) // reader := formMsg(chatBody, userMsg, role)
if reader == nil { reader, err := chunkParser.FormMsg(userMsg, role)
logger.Error("empty reader from msgs", "role", role) if reader == nil || err != nil {
logger.Error("empty reader from msgs", "role", role, "error", err)
return return
} }
go sendMsgToLLM(reader) go sendMsgToLLM(reader)
@@ -238,8 +246,7 @@ out:
// bot msg is done; // bot msg is done;
// now check it for func call // now check it for func call
// logChat(activeChatName, chatBody.Messages) // logChat(activeChatName, chatBody.Messages)
err := updateStorageChat(activeChatName, chatBody.Messages) if err := updateStorageChat(activeChatName, chatBody.Messages); err != nil {
if err != nil {
logger.Warn("failed to update storage", "error", err, "name", activeChatName) logger.Warn("failed to update storage", "error", err, "name", activeChatName)
} }
findCall(respText.String(), tv) findCall(respText.String(), tv)
@@ -328,8 +335,8 @@ func charToStart(agentName string) bool {
func runModelNameTicker(n time.Duration) { func runModelNameTicker(n time.Duration) {
ticker := time.NewTicker(n) ticker := time.NewTicker(n)
for { for {
<-ticker.C
fetchModelName() fetchModelName()
<-ticker.C
} }
} }
@@ -339,7 +346,8 @@ func init() {
{Role: "system", Content: basicSysMsg}, {Role: "system", Content: basicSysMsg},
{Role: cfg.AssistantRole, Content: defaultFirstMsg}, {Role: cfg.AssistantRole, Content: defaultFirstMsg},
} }
logfile, err := os.OpenFile(cfg.LogFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) logfile, err := os.OpenFile(cfg.LogFile,
os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil { if err != nil {
logger.Error("failed to open log file", "error", err, "filename", cfg.LogFile) logger.Error("failed to open log file", "error", err, "filename", cfg.LogFile)
return return
@@ -372,6 +380,7 @@ func init() {
Stream: true, Stream: true,
Messages: lastChat, Messages: lastChat,
} }
go runModelNameTicker(time.Second * 20) initChunkParser()
go runModelNameTicker(time.Second * 120)
// tempLoad() // tempLoad()
} }

View File

@@ -57,28 +57,33 @@ type RoleMsg struct {
} }
func (m RoleMsg) ToText(i int, cfg *config.Config) string { func (m RoleMsg) ToText(i int, cfg *config.Config) string {
icon := "" icon := fmt.Sprintf("(%d)", i)
switch m.Role { if !strings.HasPrefix(m.Content, cfg.UserRole+":") && !strings.HasPrefix(m.Content, cfg.AssistantRole+":") {
case "assistant": switch m.Role {
icon = fmt.Sprintf("(%d) %s", i, cfg.AssistantIcon) case "assistant":
case "user": icon = fmt.Sprintf("(%d) %s", i, cfg.AssistantIcon)
icon = fmt.Sprintf("(%d) %s", i, cfg.UserIcon) case "user":
case "system": icon = fmt.Sprintf("(%d) %s", i, cfg.UserIcon)
icon = fmt.Sprintf("(%d) <system>: ", i) case "system":
case "tool": icon = fmt.Sprintf("(%d) <system>: ", i)
icon = fmt.Sprintf("(%d) %s", i, cfg.ToolIcon) case "tool":
default: icon = fmt.Sprintf("(%d) %s", i, cfg.ToolIcon)
icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role) default:
icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role)
}
} }
textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, m.Content) textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, m.Content)
return strings.ReplaceAll(textMsg, "\n\n", "\n") return strings.ReplaceAll(textMsg, "\n\n", "\n")
} }
func (m RoleMsg) ToPrompt() string {
return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, m.Content), "\n\n", "\n")
}
type ChatBody struct { type ChatBody struct {
Model string `json:"model"` Model string `json:"model"`
Stream bool `json:"stream"` Stream bool `json:"stream"`
Messages []RoleMsg `json:"messages"` Messages []RoleMsg `json:"messages"`
DRYMultiplier float32 `json:"frequency_penalty"`
} }
type ChatToolsBody struct { type ChatToolsBody struct {
@@ -144,3 +149,45 @@ type LLMModels struct {
} `json:"meta"` } `json:"meta"`
} `json:"data"` } `json:"data"`
} }
type LlamaCPPReq struct {
Stream bool `json:"stream"`
// Messages []RoleMsg `json:"messages"`
Prompt string `json:"prompt"`
Temperature float32 `json:"temperature"`
DryMultiplier float32 `json:"dry_multiplier"`
Stop []string `json:"stop"`
// MaxTokens int `json:"max_tokens"`
// DryBase float64 `json:"dry_base"`
// DryAllowedLength int `json:"dry_allowed_length"`
// DryPenaltyLastN int `json:"dry_penalty_last_n"`
// CachePrompt bool `json:"cache_prompt"`
// DynatempRange int `json:"dynatemp_range"`
// DynatempExponent int `json:"dynatemp_exponent"`
// TopK int `json:"top_k"`
// TopP float32 `json:"top_p"`
// MinP float32 `json:"min_p"`
// TypicalP int `json:"typical_p"`
// XtcProbability int `json:"xtc_probability"`
// XtcThreshold float32 `json:"xtc_threshold"`
// RepeatLastN int `json:"repeat_last_n"`
// RepeatPenalty int `json:"repeat_penalty"`
// PresencePenalty int `json:"presence_penalty"`
// FrequencyPenalty int `json:"frequency_penalty"`
// Samplers string `json:"samplers"`
}
func NewLCPReq(prompt, role string) LlamaCPPReq {
return LlamaCPPReq{
Stream: true,
Prompt: prompt,
Temperature: 0.8,
DryMultiplier: 0.5,
Stop: []string{role + ":\n", "<|im_end|>"},
}
}
type LlamaCPPResp struct {
Content string `json:"content"`
Stop bool `json:"stop"`
}

36
tui.go
View File

@@ -37,6 +37,7 @@ var (
renamePage = "renamePage" renamePage = "renamePage"
RAGPage = "RAGPage " RAGPage = "RAGPage "
longStatusPage = "longStatusPage" longStatusPage = "longStatusPage"
propsPage = "propsPage"
// help text // help text
helpText = ` helpText = `
[yellow]Esc[white]: send msg [yellow]Esc[white]: send msg
@@ -129,6 +130,36 @@ func startNewChat() {
colorText() colorText()
} }
func makePropsForm(props map[string]float32) *tview.Form {
form := tview.NewForm().
AddTextView("Notes", "Props for llamacpp completion call", 40, 2, true, false).
AddCheckbox("Age 18+", false, nil).
AddButton("Quit", func() {
pages.RemovePage(propsPage)
})
form.AddButton("Save", func() {
defer pages.RemovePage(propsPage)
for pn := range props {
propField, ok := form.GetFormItemByLabel(pn).(*tview.InputField)
if !ok {
logger.Warn("failed to convert to inputfield", "prop_name", pn)
continue
}
val, err := strconv.ParseFloat(propField.GetText(), 32)
if err != nil {
logger.Warn("failed parse to float", "value", propField.GetText())
continue
}
props[pn] = float32(val)
}
})
for propName, value := range props {
form.AddInputField(propName, fmt.Sprintf("%v", value), 20, tview.InputFieldFloat, nil)
}
form.SetBorder(true).SetTitle("Enter some data").SetTitleAlign(tview.AlignLeft)
return form
}
func init() { func init() {
theme := tview.Theme{ theme := tview.Theme{
PrimitiveBackgroundColor: tcell.ColorDefault, PrimitiveBackgroundColor: tcell.ColorDefault,
@@ -420,8 +451,9 @@ func init() {
} }
return nil return nil
} }
if event.Key() == tcell.KeyCtrlA { if event.Key() == tcell.KeyCtrlP {
textArea.SetText("pressed ctrl+a", true) propsForm := makePropsForm(defaultLCPProps)
pages.AddPage(propsPage, propsForm, true, true)
return nil return nil
} }
if event.Key() == tcell.KeyCtrlN { if event.Key() == tcell.KeyCtrlN {