Feat: llamacpp /completion attempt
This commit is contained in:
@@ -40,6 +40,7 @@
|
||||
- consider adding use /completion of llamacpp, since openai endpoint clearly has template|format issues;
|
||||
- change temp, min-p and other params from tui;
|
||||
- DRY;
|
||||
- keybind to switch between openai and llamacpp endpoints;
|
||||
|
||||
### FIX:
|
||||
- bot responding (or hanging) blocks everything; +
|
||||
@@ -63,3 +64,4 @@
|
||||
- number of sentences in a batch should depend on number of words there. +
|
||||
- F1 can load any chat, by loading chat of other agent it does not switch agents, if that chat is continued, it will rewrite agent in db; (either allow only chats from current agent OR switch agent on chat loading); +
|
||||
- after chat is deleted: load undeleted chat; +
|
||||
- name split for llamacpp completion. user msg should end with 'bot_name:';
|
||||
|
||||
145
bot.go
145
bot.go
@@ -2,7 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"elefant/config"
|
||||
"elefant/models"
|
||||
"elefant/rag"
|
||||
@@ -37,32 +36,38 @@ var (
|
||||
interruptResp = false
|
||||
ragger *rag.RAG
|
||||
currentModel = "none"
|
||||
chunkParser ChunkParser
|
||||
defaultLCPProps = map[string]float32{
|
||||
"temperature": 0.8,
|
||||
"dry_multiplier": 0.6,
|
||||
}
|
||||
)
|
||||
|
||||
// ====
|
||||
|
||||
func formMsg(chatBody *models.ChatBody, newMsg, role string) io.Reader {
|
||||
if newMsg != "" { // otherwise let the bot continue
|
||||
newMsg := models.RoleMsg{Role: role, Content: newMsg}
|
||||
chatBody.Messages = append(chatBody.Messages, newMsg)
|
||||
// if rag
|
||||
if cfg.RAGEnabled {
|
||||
ragResp, err := chatRagUse(newMsg.Content)
|
||||
if err != nil {
|
||||
logger.Error("failed to form a rag msg", "error", err)
|
||||
return nil
|
||||
}
|
||||
ragMsg := models.RoleMsg{Role: cfg.ToolRole, Content: ragResp}
|
||||
chatBody.Messages = append(chatBody.Messages, ragMsg)
|
||||
}
|
||||
}
|
||||
data, err := json.Marshal(chatBody)
|
||||
if err != nil {
|
||||
logger.Error("failed to form a msg", "error", err)
|
||||
return nil
|
||||
}
|
||||
return bytes.NewReader(data)
|
||||
}
|
||||
// DEPRECATED
|
||||
// func formMsg(chatBody *models.ChatBody, newMsg, role string) io.Reader {
|
||||
// if newMsg != "" { // otherwise let the bot continue
|
||||
// newMsg := models.RoleMsg{Role: role, Content: newMsg}
|
||||
// chatBody.Messages = append(chatBody.Messages, newMsg)
|
||||
// // if rag
|
||||
// if cfg.RAGEnabled {
|
||||
// ragResp, err := chatRagUse(newMsg.Content)
|
||||
// if err != nil {
|
||||
// logger.Error("failed to form a rag msg", "error", err)
|
||||
// return nil
|
||||
// }
|
||||
// ragMsg := models.RoleMsg{Role: cfg.ToolRole, Content: ragResp}
|
||||
// chatBody.Messages = append(chatBody.Messages, ragMsg)
|
||||
// }
|
||||
// }
|
||||
// data, err := json.Marshal(chatBody)
|
||||
// if err != nil {
|
||||
// logger.Error("failed to form a msg", "error", err)
|
||||
// return nil
|
||||
// }
|
||||
// return bytes.NewReader(data)
|
||||
// }
|
||||
|
||||
func fetchModelName() {
|
||||
api := "http://localhost:8080/v1/models"
|
||||
@@ -85,26 +90,26 @@ func fetchModelName() {
|
||||
updateStatusLine()
|
||||
}
|
||||
|
||||
func fetchProps() {
|
||||
api := "http://localhost:8080/props"
|
||||
resp, err := httpClient.Get(api)
|
||||
if err != nil {
|
||||
logger.Warn("failed to get model", "link", api, "error", err)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
llmModel := models.LLMModels{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&llmModel); err != nil {
|
||||
logger.Warn("failed to decode resp", "link", api, "error", err)
|
||||
return
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
currentModel = "none"
|
||||
return
|
||||
}
|
||||
currentModel = path.Base(llmModel.Data[0].ID)
|
||||
updateStatusLine()
|
||||
}
|
||||
// func fetchProps() {
|
||||
// api := "http://localhost:8080/props"
|
||||
// resp, err := httpClient.Get(api)
|
||||
// if err != nil {
|
||||
// logger.Warn("failed to get model", "link", api, "error", err)
|
||||
// return
|
||||
// }
|
||||
// defer resp.Body.Close()
|
||||
// llmModel := models.LLMModels{}
|
||||
// if err := json.NewDecoder(resp.Body).Decode(&llmModel); err != nil {
|
||||
// logger.Warn("failed to decode resp", "link", api, "error", err)
|
||||
// return
|
||||
// }
|
||||
// if resp.StatusCode != 200 {
|
||||
// currentModel = "none"
|
||||
// return
|
||||
// }
|
||||
// currentModel = path.Base(llmModel.Data[0].ID)
|
||||
// updateStatusLine()
|
||||
// }
|
||||
|
||||
// func sendMsgToLLM(body io.Reader) (*models.LLMRespChunk, error) {
|
||||
func sendMsgToLLM(body io.Reader) {
|
||||
@@ -116,7 +121,6 @@ func sendMsgToLLM(body io.Reader) {
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
// llmResp := []models.LLMRespChunk{}
|
||||
reader := bufio.NewReader(resp.Body)
|
||||
counter := uint32(0)
|
||||
for {
|
||||
@@ -131,10 +135,13 @@ func sendMsgToLLM(body io.Reader) {
|
||||
streamDone <- true
|
||||
break
|
||||
}
|
||||
llmchunk := models.LLMRespChunk{}
|
||||
line, err := reader.ReadBytes('\n')
|
||||
if err != nil {
|
||||
logger.Error("error reading response body", "error", err)
|
||||
logger.Error("error reading response body", "error", err, "line", string(line))
|
||||
if err.Error() != "EOF" {
|
||||
streamDone <- true
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
if len(line) <= 1 {
|
||||
@@ -142,24 +149,24 @@ func sendMsgToLLM(body io.Reader) {
|
||||
}
|
||||
// starts with -> data:
|
||||
line = line[6:]
|
||||
if err := json.Unmarshal(line, &llmchunk); err != nil {
|
||||
logger.Error("failed to decode", "error", err, "line", string(line))
|
||||
content, stop, err := chunkParser.ParseChunk(line)
|
||||
if err != nil {
|
||||
logger.Error("error parsing response body", "error", err, "line", string(line), "url", cfg.APIURL)
|
||||
streamDone <- true
|
||||
return
|
||||
}
|
||||
// llmResp = append(llmResp, llmchunk)
|
||||
// logger.Info("streamview", "chunk", llmchunk)
|
||||
// if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason != "chat.completion.chunk" {
|
||||
if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" {
|
||||
if llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content != "" {
|
||||
logger.Warn("text inside of finish llmchunk", "chunk", llmchunk, "counter", counter)
|
||||
}
|
||||
streamDone <- true
|
||||
// last chunk
|
||||
break
|
||||
}
|
||||
if stop {
|
||||
if content != "" {
|
||||
logger.Warn("text inside of finish llmchunk", "chunk", content, "counter", counter)
|
||||
}
|
||||
streamDone <- true
|
||||
break
|
||||
}
|
||||
if counter == 0 {
|
||||
content = strings.TrimPrefix(content, " ")
|
||||
}
|
||||
// bot sends way too many \n
|
||||
answerText := strings.ReplaceAll(llmchunk.Choices[0].Delta.Content, "\n\n", "\n")
|
||||
answerText := strings.ReplaceAll(content, "\n\n", "\n")
|
||||
chunkChan <- answerText
|
||||
}
|
||||
}
|
||||
@@ -203,9 +210,10 @@ func chatRagUse(qText string) (string, error) {
|
||||
|
||||
func chatRound(userMsg, role string, tv *tview.TextView, regen bool) {
|
||||
botRespMode = true
|
||||
reader := formMsg(chatBody, userMsg, role)
|
||||
if reader == nil {
|
||||
logger.Error("empty reader from msgs", "role", role)
|
||||
// reader := formMsg(chatBody, userMsg, role)
|
||||
reader, err := chunkParser.FormMsg(userMsg, role)
|
||||
if reader == nil || err != nil {
|
||||
logger.Error("empty reader from msgs", "role", role, "error", err)
|
||||
return
|
||||
}
|
||||
go sendMsgToLLM(reader)
|
||||
@@ -238,8 +246,7 @@ out:
|
||||
// bot msg is done;
|
||||
// now check it for func call
|
||||
// logChat(activeChatName, chatBody.Messages)
|
||||
err := updateStorageChat(activeChatName, chatBody.Messages)
|
||||
if err != nil {
|
||||
if err := updateStorageChat(activeChatName, chatBody.Messages); err != nil {
|
||||
logger.Warn("failed to update storage", "error", err, "name", activeChatName)
|
||||
}
|
||||
findCall(respText.String(), tv)
|
||||
@@ -328,8 +335,8 @@ func charToStart(agentName string) bool {
|
||||
func runModelNameTicker(n time.Duration) {
|
||||
ticker := time.NewTicker(n)
|
||||
for {
|
||||
<-ticker.C
|
||||
fetchModelName()
|
||||
<-ticker.C
|
||||
}
|
||||
}
|
||||
|
||||
@@ -339,7 +346,8 @@ func init() {
|
||||
{Role: "system", Content: basicSysMsg},
|
||||
{Role: cfg.AssistantRole, Content: defaultFirstMsg},
|
||||
}
|
||||
logfile, err := os.OpenFile(cfg.LogFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
logfile, err := os.OpenFile(cfg.LogFile,
|
||||
os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
logger.Error("failed to open log file", "error", err, "filename", cfg.LogFile)
|
||||
return
|
||||
@@ -372,6 +380,7 @@ func init() {
|
||||
Stream: true,
|
||||
Messages: lastChat,
|
||||
}
|
||||
go runModelNameTicker(time.Second * 20)
|
||||
initChunkParser()
|
||||
go runModelNameTicker(time.Second * 120)
|
||||
// tempLoad()
|
||||
}
|
||||
|
||||
@@ -57,28 +57,33 @@ type RoleMsg struct {
|
||||
}
|
||||
|
||||
func (m RoleMsg) ToText(i int, cfg *config.Config) string {
|
||||
icon := ""
|
||||
switch m.Role {
|
||||
case "assistant":
|
||||
icon = fmt.Sprintf("(%d) %s", i, cfg.AssistantIcon)
|
||||
case "user":
|
||||
icon = fmt.Sprintf("(%d) %s", i, cfg.UserIcon)
|
||||
case "system":
|
||||
icon = fmt.Sprintf("(%d) <system>: ", i)
|
||||
case "tool":
|
||||
icon = fmt.Sprintf("(%d) %s", i, cfg.ToolIcon)
|
||||
default:
|
||||
icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role)
|
||||
icon := fmt.Sprintf("(%d)", i)
|
||||
if !strings.HasPrefix(m.Content, cfg.UserRole+":") && !strings.HasPrefix(m.Content, cfg.AssistantRole+":") {
|
||||
switch m.Role {
|
||||
case "assistant":
|
||||
icon = fmt.Sprintf("(%d) %s", i, cfg.AssistantIcon)
|
||||
case "user":
|
||||
icon = fmt.Sprintf("(%d) %s", i, cfg.UserIcon)
|
||||
case "system":
|
||||
icon = fmt.Sprintf("(%d) <system>: ", i)
|
||||
case "tool":
|
||||
icon = fmt.Sprintf("(%d) %s", i, cfg.ToolIcon)
|
||||
default:
|
||||
icon = fmt.Sprintf("(%d) <%s>: ", i, m.Role)
|
||||
}
|
||||
}
|
||||
textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, m.Content)
|
||||
return strings.ReplaceAll(textMsg, "\n\n", "\n")
|
||||
}
|
||||
|
||||
func (m RoleMsg) ToPrompt() string {
|
||||
return strings.ReplaceAll(fmt.Sprintf("%s:\n%s", m.Role, m.Content), "\n\n", "\n")
|
||||
}
|
||||
|
||||
type ChatBody struct {
|
||||
Model string `json:"model"`
|
||||
Stream bool `json:"stream"`
|
||||
Messages []RoleMsg `json:"messages"`
|
||||
DRYMultiplier float32 `json:"frequency_penalty"`
|
||||
Model string `json:"model"`
|
||||
Stream bool `json:"stream"`
|
||||
Messages []RoleMsg `json:"messages"`
|
||||
}
|
||||
|
||||
type ChatToolsBody struct {
|
||||
@@ -144,3 +149,45 @@ type LLMModels struct {
|
||||
} `json:"meta"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type LlamaCPPReq struct {
|
||||
Stream bool `json:"stream"`
|
||||
// Messages []RoleMsg `json:"messages"`
|
||||
Prompt string `json:"prompt"`
|
||||
Temperature float32 `json:"temperature"`
|
||||
DryMultiplier float32 `json:"dry_multiplier"`
|
||||
Stop []string `json:"stop"`
|
||||
// MaxTokens int `json:"max_tokens"`
|
||||
// DryBase float64 `json:"dry_base"`
|
||||
// DryAllowedLength int `json:"dry_allowed_length"`
|
||||
// DryPenaltyLastN int `json:"dry_penalty_last_n"`
|
||||
// CachePrompt bool `json:"cache_prompt"`
|
||||
// DynatempRange int `json:"dynatemp_range"`
|
||||
// DynatempExponent int `json:"dynatemp_exponent"`
|
||||
// TopK int `json:"top_k"`
|
||||
// TopP float32 `json:"top_p"`
|
||||
// MinP float32 `json:"min_p"`
|
||||
// TypicalP int `json:"typical_p"`
|
||||
// XtcProbability int `json:"xtc_probability"`
|
||||
// XtcThreshold float32 `json:"xtc_threshold"`
|
||||
// RepeatLastN int `json:"repeat_last_n"`
|
||||
// RepeatPenalty int `json:"repeat_penalty"`
|
||||
// PresencePenalty int `json:"presence_penalty"`
|
||||
// FrequencyPenalty int `json:"frequency_penalty"`
|
||||
// Samplers string `json:"samplers"`
|
||||
}
|
||||
|
||||
func NewLCPReq(prompt, role string) LlamaCPPReq {
|
||||
return LlamaCPPReq{
|
||||
Stream: true,
|
||||
Prompt: prompt,
|
||||
Temperature: 0.8,
|
||||
DryMultiplier: 0.5,
|
||||
Stop: []string{role + ":\n", "<|im_end|>"},
|
||||
}
|
||||
}
|
||||
|
||||
type LlamaCPPResp struct {
|
||||
Content string `json:"content"`
|
||||
Stop bool `json:"stop"`
|
||||
}
|
||||
|
||||
36
tui.go
36
tui.go
@@ -37,6 +37,7 @@ var (
|
||||
renamePage = "renamePage"
|
||||
RAGPage = "RAGPage "
|
||||
longStatusPage = "longStatusPage"
|
||||
propsPage = "propsPage"
|
||||
// help text
|
||||
helpText = `
|
||||
[yellow]Esc[white]: send msg
|
||||
@@ -129,6 +130,36 @@ func startNewChat() {
|
||||
colorText()
|
||||
}
|
||||
|
||||
func makePropsForm(props map[string]float32) *tview.Form {
|
||||
form := tview.NewForm().
|
||||
AddTextView("Notes", "Props for llamacpp completion call", 40, 2, true, false).
|
||||
AddCheckbox("Age 18+", false, nil).
|
||||
AddButton("Quit", func() {
|
||||
pages.RemovePage(propsPage)
|
||||
})
|
||||
form.AddButton("Save", func() {
|
||||
defer pages.RemovePage(propsPage)
|
||||
for pn := range props {
|
||||
propField, ok := form.GetFormItemByLabel(pn).(*tview.InputField)
|
||||
if !ok {
|
||||
logger.Warn("failed to convert to inputfield", "prop_name", pn)
|
||||
continue
|
||||
}
|
||||
val, err := strconv.ParseFloat(propField.GetText(), 32)
|
||||
if err != nil {
|
||||
logger.Warn("failed parse to float", "value", propField.GetText())
|
||||
continue
|
||||
}
|
||||
props[pn] = float32(val)
|
||||
}
|
||||
})
|
||||
for propName, value := range props {
|
||||
form.AddInputField(propName, fmt.Sprintf("%v", value), 20, tview.InputFieldFloat, nil)
|
||||
}
|
||||
form.SetBorder(true).SetTitle("Enter some data").SetTitleAlign(tview.AlignLeft)
|
||||
return form
|
||||
}
|
||||
|
||||
func init() {
|
||||
theme := tview.Theme{
|
||||
PrimitiveBackgroundColor: tcell.ColorDefault,
|
||||
@@ -420,8 +451,9 @@ func init() {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if event.Key() == tcell.KeyCtrlA {
|
||||
textArea.SetText("pressed ctrl+a", true)
|
||||
if event.Key() == tcell.KeyCtrlP {
|
||||
propsForm := makePropsForm(defaultLCPProps)
|
||||
pages.AddPage(propsPage, propsForm, true, true)
|
||||
return nil
|
||||
}
|
||||
if event.Key() == tcell.KeyCtrlN {
|
||||
|
||||
Reference in New Issue
Block a user