From 23c21f87bb63e1685cff2395dcd6c5f25e294743 Mon Sep 17 00:00:00 2001 From: Grail Finder Date: Mon, 23 Feb 2026 09:18:19 +0300 Subject: [PATCH] Feat: show stats --- bot.go | 51 +++++++++++++++++++++++++++++++++++++++++------- models/models.go | 35 +++++++++++++++++---------------- 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/bot.go b/bot.go index f4dbf52..ee04849 100644 --- a/bot.go +++ b/bot.go @@ -46,6 +46,7 @@ var ( ragger *rag.RAG chunkParser ChunkParser lastToolCall *models.FuncCall + lastRespStats *models.ResponseStats //nolint:unused // TTS_ENABLED conditionally uses this orator Orator asr STT @@ -532,6 +533,19 @@ func extractDetailedErrorFromBytes(body []byte, statusCode int) string { return fmt.Sprintf("HTTP Status: %d, Response Body: %s", statusCode, string(body)) } +func finalizeRespStats(tokenCount int, startTime time.Time) { + duration := time.Since(startTime).Seconds() + var tps float64 + if duration > 0 { + tps = float64(tokenCount) / duration + } + lastRespStats = &models.ResponseStats{ + Tokens: tokenCount, + Duration: duration, + TokensPerSec: tps, + } +} + // sendMsgToLLM expects streaming resp func sendMsgToLLM(body io.Reader) { choseChunkParser() @@ -590,6 +604,8 @@ func sendMsgToLLM(body io.Reader) { defer resp.Body.Close() reader := bufio.NewReader(resp.Body) counter := uint32(0) + tokenCount := 0 + startTime := time.Now() hasReasoning := false reasoningSent := false for { @@ -601,6 +617,7 @@ func sendMsgToLLM(body io.Reader) { // to stop from spiriling in infinity read of bad bytes that happens with poor connection if cfg.ChunkLimit > 0 && counter > cfg.ChunkLimit { logger.Warn("response hit chunk limit", "limit", cfg.ChunkLimit) + finalizeRespStats(tokenCount, startTime) streamDone <- true break } @@ -624,6 +641,7 @@ func sendMsgToLLM(body io.Reader) { logger.Error("failed to notify", "error", err) } } + finalizeRespStats(tokenCount, startTime) streamDone <- true break // } @@ -639,6 +657,7 @@ func sendMsgToLLM(body io.Reader) { line = line[6:] logger.Debug("debugging resp", "line", string(line)) if bytes.Equal(line, []byte("[DONE]\n")) { + finalizeRespStats(tokenCount, startTime) streamDone <- true break } @@ -652,6 +671,7 @@ func sendMsgToLLM(body io.Reader) { if err := notifyUser("LLM Response Error", "Failed to parse LLM response: "+err.Error()); err != nil { logger.Error("failed to notify user", "error", err) } + finalizeRespStats(tokenCount, startTime) streamDone <- true break } @@ -667,12 +687,15 @@ func sendMsgToLLM(body io.Reader) { // Close the thinking block if we were streaming reasoning and haven't closed it yet if hasReasoning && !reasoningSent { chunkChan <- "" + tokenCount++ } if chunk.Chunk != "" { logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter) answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n") chunkChan <- answerText + tokenCount++ } + finalizeRespStats(tokenCount, startTime) streamDone <- true break } @@ -684,12 +707,14 @@ func sendMsgToLLM(body io.Reader) { if !hasReasoning { // First reasoning chunk - send opening tag chunkChan <- "" + tokenCount++ hasReasoning = true } // Stream reasoning content immediately answerText = strings.ReplaceAll(chunk.Reasoning, "\n\n", "\n") if answerText != "" { chunkChan <- answerText + tokenCount++ } } @@ -697,6 +722,7 @@ func sendMsgToLLM(body io.Reader) { if chunk.Chunk != "" && hasReasoning && !reasoningSent { // Close the thinking block before sending actual content chunkChan <- "" + tokenCount++ reasoningSent = true } @@ -708,10 +734,12 @@ func sendMsgToLLM(body io.Reader) { if chunkParser.GetAPIType() == models.APITypeCompletion && slices.Contains(stopStrings, answerText) { logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText) + finalizeRespStats(tokenCount, startTime) streamDone <- true } if answerText != "" { chunkChan <- answerText + tokenCount++ } openAIToolChan <- chunk.ToolChunk if chunk.FuncName != "" { @@ -723,6 +751,7 @@ func sendMsgToLLM(body io.Reader) { if interruptResp { // read bytes, so it would not get into beginning of the next req interruptResp = false logger.Info("interrupted bot response", "chunk_counter", counter) + finalizeRespStats(tokenCount, startTime) streamDone <- true break } @@ -914,7 +943,6 @@ out: textView.ScrollToEnd() } case <-streamDone: - // drain any remaining chunks from chunkChan before exiting for len(chunkChan) > 0 { chunk := <-chunkChan fmt.Fprint(textView, chunk) @@ -923,31 +951,40 @@ out: textView.ScrollToEnd() } if cfg.TTS_ENABLED { - // Send chunk to audio stream handler TTSTextChan <- chunk } } if cfg.TTS_ENABLED { - // msg is done; flush it down TTSFlushChan <- true } break out } } + var msgStats *models.ResponseStats + if lastRespStats != nil { + msgStats = &models.ResponseStats{ + Tokens: lastRespStats.Tokens, + Duration: lastRespStats.Duration, + TokensPerSec: lastRespStats.TokensPerSec, + } + lastRespStats = nil + } botRespMode = false - // numbers in chatbody and displayed must be the same if r.Resume { chatBody.Messages[len(chatBody.Messages)-1].Content += respText.String() - // lastM.Content = lastM.Content + respText.String() - // Process the updated message to check for known_to tags in resumed response updatedMsg := chatBody.Messages[len(chatBody.Messages)-1] processedMsg := processMessageTag(&updatedMsg) chatBody.Messages[len(chatBody.Messages)-1] = *processedMsg + if msgStats != nil && chatBody.Messages[len(chatBody.Messages)-1].Role != cfg.ToolRole { + chatBody.Messages[len(chatBody.Messages)-1].Stats = msgStats + } } else { - // Message was already added at the start, just process it for known_to tags chatBody.Messages[msgIdx].Content = respText.String() processedMsg := processMessageTag(&chatBody.Messages[msgIdx]) chatBody.Messages[msgIdx] = *processedMsg + if msgStats != nil && chatBody.Messages[msgIdx].Role != cfg.ToolRole { + chatBody.Messages[msgIdx].Stats = msgStats + } stopTTSIfNotForUser(&chatBody.Messages[msgIdx]) } cleanChatBody() diff --git a/models/models.go b/models/models.go index c5e9435..1e65b25 100644 --- a/models/models.go +++ b/models/models.go @@ -105,12 +105,13 @@ type ImageContentPart struct { // RoleMsg represents a message with content that can be either a simple string or structured content parts type RoleMsg struct { - Role string `json:"role"` - Content string `json:"-"` - ContentParts []any `json:"-"` - ToolCallID string `json:"tool_call_id,omitempty"` // For tool response messages - KnownTo []string `json:"known_to,omitempty"` - hasContentParts bool // Flag to indicate which content type to marshal + Role string `json:"role"` + Content string `json:"-"` + ContentParts []any `json:"-"` + ToolCallID string `json:"tool_call_id,omitempty"` // For tool response messages + KnownTo []string `json:"known_to,omitempty"` + Stats *ResponseStats `json:"-"` // Display-only, not persisted + hasContentParts bool // Flag to indicate which content type to marshal } // MarshalJSON implements custom JSON marshaling for RoleMsg @@ -183,13 +184,11 @@ func (m *RoleMsg) UnmarshalJSON(data []byte) error { } func (m *RoleMsg) ToText(i int) string { - // Convert content to string representation var contentStr string var imageIndicators []string if !m.hasContentParts { contentStr = m.Content } else { - // For structured content, collect text parts and image indicators var textParts []string for _, part := range m.ContentParts { switch p := part.(type) { @@ -198,7 +197,6 @@ func (m *RoleMsg) ToText(i int) string { textParts = append(textParts, p.Text) } case ImageContentPart: - // Collect image indicator displayPath := p.Path if displayPath == "" { displayPath = "image" @@ -216,7 +214,6 @@ func (m *RoleMsg) ToText(i int) string { } } case "image_url": - // Handle unmarshaled image content var displayPath string if pathVal, pathExists := p["path"]; pathExists { if pathStr, isStr := pathVal.(string); isStr && pathStr != "" { @@ -233,23 +230,20 @@ func (m *RoleMsg) ToText(i int) string { } contentStr = strings.Join(textParts, " ") + " " } - // check if already has role annotation (/completion makes them) - // in that case remove it, and then add to icon - // since icon and content are separated by \n contentStr, _ = strings.CutPrefix(contentStr, m.Role+":") - // if !strings.HasPrefix(contentStr, m.Role+":") { icon := fmt.Sprintf("(%d) <%s>: ", i, m.Role) - // } - // Build final message with image indicators before text var finalContent strings.Builder if len(imageIndicators) > 0 { - // Add each image indicator on its own line for _, indicator := range imageIndicators { finalContent.WriteString(indicator) finalContent.WriteString("\n") } } finalContent.WriteString(contentStr) + if m.Stats != nil { + finalContent.WriteString(fmt.Sprintf("\n[gray::i][%d tok, %.1fs, %.1f t/s][-:-:-]", + m.Stats.Tokens, m.Stats.Duration, m.Stats.TokensPerSec)) + } textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, finalContent.String()) return strings.ReplaceAll(textMsg, "\n\n", "\n") } @@ -331,6 +325,7 @@ func (m *RoleMsg) Copy() RoleMsg { ContentParts: m.ContentParts, ToolCallID: m.ToolCallID, KnownTo: m.KnownTo, + Stats: m.Stats, hasContentParts: m.hasContentParts, } } @@ -643,6 +638,12 @@ func (lcp *LCPModels) ListModels() []string { return resp } +type ResponseStats struct { + Tokens int + Duration float64 + TokensPerSec float64 +} + type ChatRoundReq struct { UserMsg string Role string