Feat (config): chunk limit
This commit is contained in:
@@ -18,11 +18,12 @@
|
||||
- sqlite for the bot memory; +
|
||||
- rename current chat; +
|
||||
- help page with all key bindings; +
|
||||
- change temp, min-p and other params from tui;
|
||||
- default config file (api url, path to sysprompts, path to log, limits, etc); +
|
||||
- fullscreen textarea option (bothersome to implement);
|
||||
- consider adding use /completion of llamacpp, since openai endpoint clearly has template|format issues;
|
||||
- change temp, min-p and other params from tui;
|
||||
- default config file (api url, path to sysprompts, path to log, limits, etc);
|
||||
- export whole chat into a json file;
|
||||
- directoty with sys prompts;
|
||||
|
||||
### FIX:
|
||||
- bot responding (or haninging) blocks everything; +
|
||||
@@ -36,3 +37,4 @@
|
||||
- delete last msg: can have unexpected behavior (deletes what appears to be two messages if last bot msg was not generated (should only delete icon in that case)) (should use regen instead of delete in that case);
|
||||
- lets say we have two (or more) agents with the same name across multiple chats. These agents go and ask db for topics they memoriesed. Now they can access topics that aren't meant for them. (so memory should have an option: shareble; that indicates if that memory can be shared across chats);
|
||||
- if option to show sys msg enabled: it show display new tool responses;
|
||||
- when bot generation ended with err: need a way to switch back to the bot_resp_false mode;
|
||||
|
||||
19
bot.go
19
bot.go
@@ -13,19 +13,15 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/rivo/tview"
|
||||
)
|
||||
|
||||
var httpClient = http.Client{
|
||||
Timeout: time.Second * 20,
|
||||
}
|
||||
var httpClient = http.Client{}
|
||||
|
||||
var (
|
||||
cfg *config.Config
|
||||
logger *slog.Logger
|
||||
chunkLimit = 1000
|
||||
activeChatName string
|
||||
chunkChan = make(chan string, 10)
|
||||
streamDone = make(chan bool, 1)
|
||||
@@ -63,25 +59,25 @@ func sendMsgToLLM(body io.Reader) {
|
||||
defer resp.Body.Close()
|
||||
// llmResp := []models.LLMRespChunk{}
|
||||
reader := bufio.NewReader(resp.Body)
|
||||
counter := 0
|
||||
counter := uint32(0)
|
||||
for {
|
||||
counter++
|
||||
if interruptResp {
|
||||
interruptResp = false
|
||||
logger.Info("interrupted bot response")
|
||||
break
|
||||
}
|
||||
llmchunk := models.LLMRespChunk{}
|
||||
if counter > chunkLimit {
|
||||
logger.Warn("response hit chunk limit", "limit", chunkLimit)
|
||||
if cfg.ChunkLimit > 0 && counter > cfg.ChunkLimit {
|
||||
logger.Warn("response hit chunk limit", "limit", cfg.ChunkLimit)
|
||||
streamDone <- true
|
||||
break
|
||||
}
|
||||
llmchunk := models.LLMRespChunk{}
|
||||
line, err := reader.ReadBytes('\n')
|
||||
if err != nil {
|
||||
streamDone <- true
|
||||
logger.Error("error reading response body", "error", err)
|
||||
continue
|
||||
}
|
||||
// logger.Info("linecheck", "line", string(line), "len", len(line), "counter", counter)
|
||||
if len(line) <= 1 {
|
||||
continue // skip \n
|
||||
}
|
||||
@@ -100,7 +96,6 @@ func sendMsgToLLM(body io.Reader) {
|
||||
// last chunk
|
||||
break
|
||||
}
|
||||
counter++
|
||||
// bot sends way too many \n
|
||||
answerText := strings.ReplaceAll(llmchunk.Choices[0].Delta.Content, "\n\n", "\n")
|
||||
chunkChan <- answerText
|
||||
|
||||
@@ -16,6 +16,7 @@ type Config struct {
|
||||
AssistantIcon string `toml:"AssistantIcon"`
|
||||
UserIcon string `toml:"UserIcon"`
|
||||
ToolIcon string `toml:"ToolIcon"`
|
||||
ChunkLimit uint32 `toml:"ChunkLimit"`
|
||||
}
|
||||
|
||||
func LoadConfigOrDefault(fn string) *Config {
|
||||
@@ -32,6 +33,7 @@ func LoadConfigOrDefault(fn string) *Config {
|
||||
config.UserRole = "user"
|
||||
config.ToolRole = "tool"
|
||||
config.AssistantRole = "assistant"
|
||||
config.ChunkLimit = 8192
|
||||
}
|
||||
return config
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user