Feat: switch between completion and chat api

This commit is contained in:
Grail Finder
2025-02-01 16:32:36 +03:00
parent 336451340b
commit 84c94ecea3
8 changed files with 81 additions and 24 deletions

View File

@@ -27,22 +27,21 @@
- RAG support|implementation; + - RAG support|implementation; +
- delete chat option; + - delete chat option; +
- RAG file loading status/progress; + - RAG file loading status/progress; +
- fullscreen textarea option (bothersome to implement); - in chat management table add preview of the last message; +
- separate messages that are stored and chat and send to the bot, i.e. option to omit tool calls (there might be a point where they are no longer needed in ctx); ===== /llamacpp specific (it has a different body -> interface instead of global var)
- char card is the sys message, but how about giving tools to char that does not have it?
- it is a bit clumsy to mix chats in db and chars from the external files, maybe load external files in db on startup?
- lets say we have two (or more) agents with the same name across multiple chats. These agents go and ask db for topics they memorised. Now they can access topics that aren't meant for them. (so memory should have an option: shareable; that indicates if that memory can be shared across chats);
- server mode: no tui but api calls with the func calling, rag, other middleware;
- boolean flag to use/not use tools. I see it as a msg from a tool to an llm "Hey, it might be good idea to use me!";
- connection to a model status;
- ===== /llamacpp specific (it has a different body -> interface instead of global var)
- edit syscards; + - edit syscards; +
- consider adding use /completion of llamacpp, since openai endpoint clearly has template|format issues; + - consider adding use /completion of llamacpp, since openai endpoint clearly has template|format issues; +
- change temp, min-p and other params from tui; + - change temp, min-p and other params from tui; +
- DRY; + - DRY; +
- keybind to switch between openai and llamacpp endpoints (chat vs completion); - keybind to switch between openai and llamacpp endpoints (chat vs completion); +
=======
- char card is the sys message, but how about giving tools to char that does not have it?
- lets say we have two (or more) agents with the same name across multiple chats. These agents go and ask db for topics they memorised. Now they can access topics that aren't meant for them. (so memory should have an option: shareable; that indicates if that memory can be shared across chats);
- server mode: no tui but api calls with the func calling, rag, other middleware;
- boolean flag to use/not use tools. I see it as a msg from a tool to an llm "Hey, it might be good idea to use me!";
- connection to a model status; (need to be tied to some event, perhaps its own shortcut even)
- separate messages that are stored and chat and send to the bot, i.e. option to omit tool calls and thinking (there might be a point where they are no longer needed in ctx);
- option to remove <thinking> from chat history; - option to remove <thinking> from chat history;
- in chat management table add preview of the last message; +
### FIX: ### FIX:
- bot responding (or hanging) blocks everything; + - bot responding (or hanging) blocks everything; +
@@ -61,7 +60,6 @@
- all page names should be vars; + - all page names should be vars; +
- normal case regen omits assistant icon; + - normal case regen omits assistant icon; +
- user icon (and role?) from config is not used; + - user icon (and role?) from config is not used; +
- message editing broke ( runtime error: index out of range [-1]); +
- RAG: encode multiple sentences (~5-10) to embeddings a piece. + - RAG: encode multiple sentences (~5-10) to embeddings a piece. +
- number of sentences in a batch should depend on number of words there. + - number of sentences in a batch should depend on number of words there. +
- F1 can load any chat, by loading chat of other agent it does not switch agents, if that chat is continued, it will rewrite agent in db; (either allow only chats from current agent OR switch agent on chat loading); + - F1 can load any chat, by loading chat of other agent it does not switch agents, if that chat is continued, it will rewrite agent in db; (either allow only chats from current agent OR switch agent on chat loading); +
@@ -69,3 +67,5 @@
- name split for llamacpp completion. user msg should end with 'bot_name:'; + - name split for llamacpp completion. user msg should end with 'bot_name:'; +
- add retry on failed call (and EOF); - add retry on failed call (and EOF);
- model info shold be an event and show disconnect status when fails; - model info shold be an event and show disconnect status when fails;
- message editing broke ( runtime error: index out of range [-1]); out of index
- remove icons for agents/user; use only <role>:

4
bot.go
View File

@@ -91,7 +91,7 @@ func fetchModelName() {
// func sendMsgToLLM(body io.Reader) (*models.LLMRespChunk, error) { // func sendMsgToLLM(body io.Reader) (*models.LLMRespChunk, error) {
func sendMsgToLLM(body io.Reader) { func sendMsgToLLM(body io.Reader) {
// nolint // nolint
resp, err := httpClient.Post(cfg.APIURL, "application/json", body) resp, err := httpClient.Post(cfg.CurrentAPI, "application/json", body)
if err != nil { if err != nil {
logger.Error("llamacpp api", "error", err) logger.Error("llamacpp api", "error", err)
streamDone <- true streamDone <- true
@@ -128,7 +128,7 @@ func sendMsgToLLM(body io.Reader) {
line = line[6:] line = line[6:]
content, stop, err := chunkParser.ParseChunk(line) content, stop, err := chunkParser.ParseChunk(line)
if err != nil { if err != nil {
logger.Error("error parsing response body", "error", err, "line", string(line), "url", cfg.APIURL) logger.Error("error parsing response body", "error", err, "line", string(line), "url", cfg.CurrentAPI)
streamDone <- true streamDone <- true
break break
} }

View File

@@ -1,4 +1,5 @@
APIURL = "http://localhost:8080/v1/chat/completions" ChatAPI = "http://localhost:8080/v1/chat/completions"
CompletionAPI = "http://localhost:8080/completion"
EmbedURL = "http://localhost:8080/v1/embeddings" EmbedURL = "http://localhost:8080/v1/embeddings"
ShowSys = true ShowSys = true
LogFile = "log.txt" LogFile = "log.txt"

View File

@@ -7,7 +7,11 @@ import (
) )
type Config struct { type Config struct {
APIURL string `toml:"APIURL"` ChatAPI string `toml:"ChatAPI"`
CompletionAPI string `toml:"CompletionAPI"`
CurrentAPI string
APIMap map[string]string
//
ShowSys bool `toml:"ShowSys"` ShowSys bool `toml:"ShowSys"`
LogFile string `toml:"LogFile"` LogFile string `toml:"LogFile"`
UserRole string `toml:"UserRole"` UserRole string `toml:"UserRole"`
@@ -34,7 +38,8 @@ func LoadConfigOrDefault(fn string) *Config {
_, err := toml.DecodeFile(fn, &config) _, err := toml.DecodeFile(fn, &config)
if err != nil { if err != nil {
fmt.Println("failed to read config from file, loading default") fmt.Println("failed to read config from file, loading default")
config.APIURL = "http://localhost:8080/v1/chat/completions" config.ChatAPI = "http://localhost:8080/v1/chat/completions"
config.CompletionAPI = "http://localhost:8080/completion"
config.RAGEnabled = false config.RAGEnabled = false
config.EmbedURL = "http://localhost:8080/v1/embiddings" config.EmbedURL = "http://localhost:8080/v1/embiddings"
config.ShowSys = true config.ShowSys = true
@@ -48,6 +53,16 @@ func LoadConfigOrDefault(fn string) *Config {
config.SysDir = "sysprompts" config.SysDir = "sysprompts"
config.ChunkLimit = 8192 config.ChunkLimit = 8192
} }
config.CurrentAPI = config.ChatAPI
config.APIMap = map[string]string{
config.ChatAPI: config.CompletionAPI,
}
if config.CompletionAPI != "" {
config.CurrentAPI = config.CompletionAPI
config.APIMap = map[string]string{
config.CompletionAPI: config.ChatAPI,
}
}
// if any value is empty fill with default // if any value is empty fill with default
return config return config
} }

2
llm.go
View File

@@ -15,7 +15,7 @@ type ChunkParser interface {
func initChunkParser() { func initChunkParser() {
chunkParser = LlamaCPPeer{} chunkParser = LlamaCPPeer{}
if strings.Contains(cfg.APIURL, "v1") { if strings.Contains(cfg.CurrentAPI, "v1") {
logger.Info("chosen openai parser") logger.Info("chosen openai parser")
chunkParser = OpenAIer{} chunkParser = OpenAIer{}
return return

View File

@@ -10,7 +10,7 @@ var (
botRespMode = false botRespMode = false
editMode = false editMode = false
selectedIndex = int(-1) selectedIndex = int(-1)
indexLine = "F12 to show keys help; bot resp mode: %v; char: %s; chat: %s; RAGEnabled: %v; toolUseAdviced: %v; model: %s" indexLine = "F12 to show keys help; bot resp mode: %v; char: %s; chat: %s; RAGEnabled: %v; toolUseAdviced: %v; model: %s\nAPI_URL: %s"
focusSwitcher = map[tview.Primitive]tview.Primitive{} focusSwitcher = map[tview.Primitive]tview.Primitive{}
) )

27
server.go Normal file
View File

@@ -0,0 +1,27 @@
package main
import (
"fmt"
"net/http"
)
// create server
// listen to the completion endpoint handler
func completion(w http.ResponseWriter, req *http.Request) {
// post request
body := req.Body
// get body as io.reader
// pass it to the /completion
go sendMsgToLLM(body)
out:
for {
select {
case chunk := <-chunkChan:
fmt.Println(chunk)
case <-streamDone:
break out
}
}
return
}

24
tui.go
View File

@@ -3,7 +3,6 @@ package main
import ( import (
"elefant/models" "elefant/models"
"elefant/pngmeta" "elefant/pngmeta"
"elefant/rag"
"fmt" "fmt"
"os" "os"
"strconv" "strconv"
@@ -41,7 +40,7 @@ var (
// help text // help text
helpText = ` helpText = `
[yellow]Esc[white]: send msg [yellow]Esc[white]: send msg
[yellow]PgUp/Down[white]: switch focus [yellow]PgUp/Down[white]: switch focus between input and chat widgets
[yellow]F1[white]: manage chats [yellow]F1[white]: manage chats
[yellow]F2[white]: regen last [yellow]F2[white]: regen last
[yellow]F3[white]: delete last msg [yellow]F3[white]: delete last msg
@@ -50,13 +49,16 @@ var (
[yellow]F6[white]: interrupt bot resp [yellow]F6[white]: interrupt bot resp
[yellow]F7[white]: copy last msg to clipboard (linux xclip) [yellow]F7[white]: copy last msg to clipboard (linux xclip)
[yellow]F8[white]: copy n msg to clipboard (linux xclip) [yellow]F8[white]: copy n msg to clipboard (linux xclip)
[yellow]F10[white]: manage loaded rag files [yellow]F10[white]: manage loaded rag files (that already in vector db)
[yellow]F11[white]: switch RAGEnabled boolean [yellow]F11[white]: switch RAGEnabled boolean
[yellow]F12[white]: show this help page [yellow]F12[white]: show this help page
[yellow]Ctrl+s[white]: load new char/agent [yellow]Ctrl+s[white]: load new char/agent
[yellow]Ctrl+e[white]: export chat to json file [yellow]Ctrl+e[white]: export chat to json file
[yellow]Ctrl+n[white]: start a new chat [yellow]Ctrl+n[white]: start a new chat
[yellow]Ctrl+c[white]: close programm [yellow]Ctrl+c[white]: close programm
[yellow]Ctrl+p[white]: props edit form (min-p, dry, etc.)
[yellow]Ctrl+v[white]: switch between /completion and /chat api (if provided in config)
[yellow]Ctrl+r[white]: menu of files that can be loaded in vector db (RAG)
Press Enter to go back Press Enter to go back
` `
@@ -87,7 +89,7 @@ func colorText() {
} }
func updateStatusLine() { func updateStatusLine() {
position.SetText(fmt.Sprintf(indexLine, botRespMode, cfg.AssistantRole, activeChatName, cfg.RAGEnabled, cfg.ToolUse, currentModel)) position.SetText(fmt.Sprintf(indexLine, botRespMode, cfg.AssistantRole, activeChatName, cfg.RAGEnabled, cfg.ToolUse, currentModel, cfg.CurrentAPI))
} }
func initSysCards() ([]string, error) { func initSysCards() ([]string, error) {
@@ -473,6 +475,19 @@ func init() {
startNewChat() startNewChat()
return nil return nil
} }
if event.Key() == tcell.KeyCtrlV {
// switch between /chat and /completion api
prevAPI := cfg.CurrentAPI
newAPI := cfg.APIMap[cfg.CurrentAPI]
if newAPI == "" {
// do not switch
return nil
}
cfg.APIMap[newAPI] = prevAPI
cfg.CurrentAPI = newAPI
updateStatusLine()
return nil
}
if event.Key() == tcell.KeyCtrlS { if event.Key() == tcell.KeyCtrlS {
// switch sys prompt // switch sys prompt
labels, err := initSysCards() labels, err := initSysCards()
@@ -505,7 +520,6 @@ func init() {
} }
fileList = append(fileList, f.Name()) fileList = append(fileList, f.Name())
} }
rag.LongJobStatusCh <- "first msg"
chatRAGTable := makeRAGTable(fileList) chatRAGTable := makeRAGTable(fileList)
pages.AddPage(RAGPage, chatRAGTable, true, true) pages.AddPage(RAGPage, chatRAGTable, true, true)
return nil return nil