Feat: show stats

This commit is contained in:
Grail Finder
2026-02-23 09:18:19 +03:00
parent 850ca103e5
commit 23c21f87bb
2 changed files with 62 additions and 24 deletions

51
bot.go
View File

@@ -46,6 +46,7 @@ var (
ragger *rag.RAG ragger *rag.RAG
chunkParser ChunkParser chunkParser ChunkParser
lastToolCall *models.FuncCall lastToolCall *models.FuncCall
lastRespStats *models.ResponseStats
//nolint:unused // TTS_ENABLED conditionally uses this //nolint:unused // TTS_ENABLED conditionally uses this
orator Orator orator Orator
asr STT asr STT
@@ -532,6 +533,19 @@ func extractDetailedErrorFromBytes(body []byte, statusCode int) string {
return fmt.Sprintf("HTTP Status: %d, Response Body: %s", statusCode, string(body)) return fmt.Sprintf("HTTP Status: %d, Response Body: %s", statusCode, string(body))
} }
func finalizeRespStats(tokenCount int, startTime time.Time) {
duration := time.Since(startTime).Seconds()
var tps float64
if duration > 0 {
tps = float64(tokenCount) / duration
}
lastRespStats = &models.ResponseStats{
Tokens: tokenCount,
Duration: duration,
TokensPerSec: tps,
}
}
// sendMsgToLLM expects streaming resp // sendMsgToLLM expects streaming resp
func sendMsgToLLM(body io.Reader) { func sendMsgToLLM(body io.Reader) {
choseChunkParser() choseChunkParser()
@@ -590,6 +604,8 @@ func sendMsgToLLM(body io.Reader) {
defer resp.Body.Close() defer resp.Body.Close()
reader := bufio.NewReader(resp.Body) reader := bufio.NewReader(resp.Body)
counter := uint32(0) counter := uint32(0)
tokenCount := 0
startTime := time.Now()
hasReasoning := false hasReasoning := false
reasoningSent := false reasoningSent := false
for { for {
@@ -601,6 +617,7 @@ func sendMsgToLLM(body io.Reader) {
// to stop from spiriling in infinity read of bad bytes that happens with poor connection // to stop from spiriling in infinity read of bad bytes that happens with poor connection
if cfg.ChunkLimit > 0 && counter > cfg.ChunkLimit { if cfg.ChunkLimit > 0 && counter > cfg.ChunkLimit {
logger.Warn("response hit chunk limit", "limit", cfg.ChunkLimit) logger.Warn("response hit chunk limit", "limit", cfg.ChunkLimit)
finalizeRespStats(tokenCount, startTime)
streamDone <- true streamDone <- true
break break
} }
@@ -624,6 +641,7 @@ func sendMsgToLLM(body io.Reader) {
logger.Error("failed to notify", "error", err) logger.Error("failed to notify", "error", err)
} }
} }
finalizeRespStats(tokenCount, startTime)
streamDone <- true streamDone <- true
break break
// } // }
@@ -639,6 +657,7 @@ func sendMsgToLLM(body io.Reader) {
line = line[6:] line = line[6:]
logger.Debug("debugging resp", "line", string(line)) logger.Debug("debugging resp", "line", string(line))
if bytes.Equal(line, []byte("[DONE]\n")) { if bytes.Equal(line, []byte("[DONE]\n")) {
finalizeRespStats(tokenCount, startTime)
streamDone <- true streamDone <- true
break break
} }
@@ -652,6 +671,7 @@ func sendMsgToLLM(body io.Reader) {
if err := notifyUser("LLM Response Error", "Failed to parse LLM response: "+err.Error()); err != nil { if err := notifyUser("LLM Response Error", "Failed to parse LLM response: "+err.Error()); err != nil {
logger.Error("failed to notify user", "error", err) logger.Error("failed to notify user", "error", err)
} }
finalizeRespStats(tokenCount, startTime)
streamDone <- true streamDone <- true
break break
} }
@@ -667,12 +687,15 @@ func sendMsgToLLM(body io.Reader) {
// Close the thinking block if we were streaming reasoning and haven't closed it yet // Close the thinking block if we were streaming reasoning and haven't closed it yet
if hasReasoning && !reasoningSent { if hasReasoning && !reasoningSent {
chunkChan <- "</think>" chunkChan <- "</think>"
tokenCount++
} }
if chunk.Chunk != "" { if chunk.Chunk != "" {
logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter) logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n") answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
chunkChan <- answerText chunkChan <- answerText
tokenCount++
} }
finalizeRespStats(tokenCount, startTime)
streamDone <- true streamDone <- true
break break
} }
@@ -684,12 +707,14 @@ func sendMsgToLLM(body io.Reader) {
if !hasReasoning { if !hasReasoning {
// First reasoning chunk - send opening tag // First reasoning chunk - send opening tag
chunkChan <- "<think>" chunkChan <- "<think>"
tokenCount++
hasReasoning = true hasReasoning = true
} }
// Stream reasoning content immediately // Stream reasoning content immediately
answerText = strings.ReplaceAll(chunk.Reasoning, "\n\n", "\n") answerText = strings.ReplaceAll(chunk.Reasoning, "\n\n", "\n")
if answerText != "" { if answerText != "" {
chunkChan <- answerText chunkChan <- answerText
tokenCount++
} }
} }
@@ -697,6 +722,7 @@ func sendMsgToLLM(body io.Reader) {
if chunk.Chunk != "" && hasReasoning && !reasoningSent { if chunk.Chunk != "" && hasReasoning && !reasoningSent {
// Close the thinking block before sending actual content // Close the thinking block before sending actual content
chunkChan <- "</think>" chunkChan <- "</think>"
tokenCount++
reasoningSent = true reasoningSent = true
} }
@@ -708,10 +734,12 @@ func sendMsgToLLM(body io.Reader) {
if chunkParser.GetAPIType() == models.APITypeCompletion && if chunkParser.GetAPIType() == models.APITypeCompletion &&
slices.Contains(stopStrings, answerText) { slices.Contains(stopStrings, answerText) {
logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText) logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
finalizeRespStats(tokenCount, startTime)
streamDone <- true streamDone <- true
} }
if answerText != "" { if answerText != "" {
chunkChan <- answerText chunkChan <- answerText
tokenCount++
} }
openAIToolChan <- chunk.ToolChunk openAIToolChan <- chunk.ToolChunk
if chunk.FuncName != "" { if chunk.FuncName != "" {
@@ -723,6 +751,7 @@ func sendMsgToLLM(body io.Reader) {
if interruptResp { // read bytes, so it would not get into beginning of the next req if interruptResp { // read bytes, so it would not get into beginning of the next req
interruptResp = false interruptResp = false
logger.Info("interrupted bot response", "chunk_counter", counter) logger.Info("interrupted bot response", "chunk_counter", counter)
finalizeRespStats(tokenCount, startTime)
streamDone <- true streamDone <- true
break break
} }
@@ -914,7 +943,6 @@ out:
textView.ScrollToEnd() textView.ScrollToEnd()
} }
case <-streamDone: case <-streamDone:
// drain any remaining chunks from chunkChan before exiting
for len(chunkChan) > 0 { for len(chunkChan) > 0 {
chunk := <-chunkChan chunk := <-chunkChan
fmt.Fprint(textView, chunk) fmt.Fprint(textView, chunk)
@@ -923,31 +951,40 @@ out:
textView.ScrollToEnd() textView.ScrollToEnd()
} }
if cfg.TTS_ENABLED { if cfg.TTS_ENABLED {
// Send chunk to audio stream handler
TTSTextChan <- chunk TTSTextChan <- chunk
} }
} }
if cfg.TTS_ENABLED { if cfg.TTS_ENABLED {
// msg is done; flush it down
TTSFlushChan <- true TTSFlushChan <- true
} }
break out break out
} }
} }
var msgStats *models.ResponseStats
if lastRespStats != nil {
msgStats = &models.ResponseStats{
Tokens: lastRespStats.Tokens,
Duration: lastRespStats.Duration,
TokensPerSec: lastRespStats.TokensPerSec,
}
lastRespStats = nil
}
botRespMode = false botRespMode = false
// numbers in chatbody and displayed must be the same
if r.Resume { if r.Resume {
chatBody.Messages[len(chatBody.Messages)-1].Content += respText.String() chatBody.Messages[len(chatBody.Messages)-1].Content += respText.String()
// lastM.Content = lastM.Content + respText.String()
// Process the updated message to check for known_to tags in resumed response
updatedMsg := chatBody.Messages[len(chatBody.Messages)-1] updatedMsg := chatBody.Messages[len(chatBody.Messages)-1]
processedMsg := processMessageTag(&updatedMsg) processedMsg := processMessageTag(&updatedMsg)
chatBody.Messages[len(chatBody.Messages)-1] = *processedMsg chatBody.Messages[len(chatBody.Messages)-1] = *processedMsg
if msgStats != nil && chatBody.Messages[len(chatBody.Messages)-1].Role != cfg.ToolRole {
chatBody.Messages[len(chatBody.Messages)-1].Stats = msgStats
}
} else { } else {
// Message was already added at the start, just process it for known_to tags
chatBody.Messages[msgIdx].Content = respText.String() chatBody.Messages[msgIdx].Content = respText.String()
processedMsg := processMessageTag(&chatBody.Messages[msgIdx]) processedMsg := processMessageTag(&chatBody.Messages[msgIdx])
chatBody.Messages[msgIdx] = *processedMsg chatBody.Messages[msgIdx] = *processedMsg
if msgStats != nil && chatBody.Messages[msgIdx].Role != cfg.ToolRole {
chatBody.Messages[msgIdx].Stats = msgStats
}
stopTTSIfNotForUser(&chatBody.Messages[msgIdx]) stopTTSIfNotForUser(&chatBody.Messages[msgIdx])
} }
cleanChatBody() cleanChatBody()

View File

@@ -105,12 +105,13 @@ type ImageContentPart struct {
// RoleMsg represents a message with content that can be either a simple string or structured content parts // RoleMsg represents a message with content that can be either a simple string or structured content parts
type RoleMsg struct { type RoleMsg struct {
Role string `json:"role"` Role string `json:"role"`
Content string `json:"-"` Content string `json:"-"`
ContentParts []any `json:"-"` ContentParts []any `json:"-"`
ToolCallID string `json:"tool_call_id,omitempty"` // For tool response messages ToolCallID string `json:"tool_call_id,omitempty"` // For tool response messages
KnownTo []string `json:"known_to,omitempty"` KnownTo []string `json:"known_to,omitempty"`
hasContentParts bool // Flag to indicate which content type to marshal Stats *ResponseStats `json:"-"` // Display-only, not persisted
hasContentParts bool // Flag to indicate which content type to marshal
} }
// MarshalJSON implements custom JSON marshaling for RoleMsg // MarshalJSON implements custom JSON marshaling for RoleMsg
@@ -183,13 +184,11 @@ func (m *RoleMsg) UnmarshalJSON(data []byte) error {
} }
func (m *RoleMsg) ToText(i int) string { func (m *RoleMsg) ToText(i int) string {
// Convert content to string representation
var contentStr string var contentStr string
var imageIndicators []string var imageIndicators []string
if !m.hasContentParts { if !m.hasContentParts {
contentStr = m.Content contentStr = m.Content
} else { } else {
// For structured content, collect text parts and image indicators
var textParts []string var textParts []string
for _, part := range m.ContentParts { for _, part := range m.ContentParts {
switch p := part.(type) { switch p := part.(type) {
@@ -198,7 +197,6 @@ func (m *RoleMsg) ToText(i int) string {
textParts = append(textParts, p.Text) textParts = append(textParts, p.Text)
} }
case ImageContentPart: case ImageContentPart:
// Collect image indicator
displayPath := p.Path displayPath := p.Path
if displayPath == "" { if displayPath == "" {
displayPath = "image" displayPath = "image"
@@ -216,7 +214,6 @@ func (m *RoleMsg) ToText(i int) string {
} }
} }
case "image_url": case "image_url":
// Handle unmarshaled image content
var displayPath string var displayPath string
if pathVal, pathExists := p["path"]; pathExists { if pathVal, pathExists := p["path"]; pathExists {
if pathStr, isStr := pathVal.(string); isStr && pathStr != "" { if pathStr, isStr := pathVal.(string); isStr && pathStr != "" {
@@ -233,23 +230,20 @@ func (m *RoleMsg) ToText(i int) string {
} }
contentStr = strings.Join(textParts, " ") + " " contentStr = strings.Join(textParts, " ") + " "
} }
// check if already has role annotation (/completion makes them)
// in that case remove it, and then add to icon
// since icon and content are separated by \n
contentStr, _ = strings.CutPrefix(contentStr, m.Role+":") contentStr, _ = strings.CutPrefix(contentStr, m.Role+":")
// if !strings.HasPrefix(contentStr, m.Role+":") {
icon := fmt.Sprintf("(%d) <%s>: ", i, m.Role) icon := fmt.Sprintf("(%d) <%s>: ", i, m.Role)
// }
// Build final message with image indicators before text
var finalContent strings.Builder var finalContent strings.Builder
if len(imageIndicators) > 0 { if len(imageIndicators) > 0 {
// Add each image indicator on its own line
for _, indicator := range imageIndicators { for _, indicator := range imageIndicators {
finalContent.WriteString(indicator) finalContent.WriteString(indicator)
finalContent.WriteString("\n") finalContent.WriteString("\n")
} }
} }
finalContent.WriteString(contentStr) finalContent.WriteString(contentStr)
if m.Stats != nil {
finalContent.WriteString(fmt.Sprintf("\n[gray::i][%d tok, %.1fs, %.1f t/s][-:-:-]",
m.Stats.Tokens, m.Stats.Duration, m.Stats.TokensPerSec))
}
textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, finalContent.String()) textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, finalContent.String())
return strings.ReplaceAll(textMsg, "\n\n", "\n") return strings.ReplaceAll(textMsg, "\n\n", "\n")
} }
@@ -331,6 +325,7 @@ func (m *RoleMsg) Copy() RoleMsg {
ContentParts: m.ContentParts, ContentParts: m.ContentParts,
ToolCallID: m.ToolCallID, ToolCallID: m.ToolCallID,
KnownTo: m.KnownTo, KnownTo: m.KnownTo,
Stats: m.Stats,
hasContentParts: m.hasContentParts, hasContentParts: m.hasContentParts,
} }
} }
@@ -643,6 +638,12 @@ func (lcp *LCPModels) ListModels() []string {
return resp return resp
} }
type ResponseStats struct {
Tokens int
Duration float64
TokensPerSec float64
}
type ChatRoundReq struct { type ChatRoundReq struct {
UserMsg string UserMsg string
Role string Role string