Enha: json tag for stats

Enha: defer finalizeRespStats
Feat: show stats
2026-02-23 09:35:40 +03:00 · 2026-02-23 09:30:37 +03:00 · 2026-02-23 09:18:19 +03:00
2 changed files with 65 additions and 35 deletions
--- a/bot.go
+++ b/bot.go
@@ -46,6 +46,7 @@ var (
 	ragger          *rag.RAG
 	chunkParser     ChunkParser
 	lastToolCall    *models.FuncCall
+	lastRespStats   *models.ResponseStats
 	//nolint:unused // TTS_ENABLED conditionally uses this
 	orator          Orator
 	asr             STT
@@ -484,30 +485,28 @@ func monitorModelLoad(modelID string) {
 // extractDetailedErrorFromBytes extracts detailed error information from response body bytes
 func extractDetailedErrorFromBytes(body []byte, statusCode int) string {
 	// Try to parse as JSON to extract detailed error information
-	var errorResponse map[string]interface{}
+	var errorResponse map[string]any
 	if err := json.Unmarshal(body, &errorResponse); err == nil {
 		// Check if it's an error response with detailed information
 		if errorData, ok := errorResponse["error"]; ok {
-			if errorMap, ok := errorData.(map[string]interface{}); ok {
+			if errorMap, ok := errorData.(map[string]any); ok {
 				var errorMsg string
 				if msg, ok := errorMap["message"]; ok {
 					errorMsg = fmt.Sprintf("%v", msg)
 				}
-
 				var details []string
 				if code, ok := errorMap["code"]; ok {
 					details = append(details, fmt.Sprintf("Code: %v", code))
 				}
-
 				if metadata, ok := errorMap["metadata"]; ok {
 					// Handle metadata which might contain raw error details
-					if metadataMap, ok := metadata.(map[string]interface{}); ok {
+					if metadataMap, ok := metadata.(map[string]any); ok {
 						if raw, ok := metadataMap["raw"]; ok {
 							// Parse the raw error string if it's JSON
-							var rawError map[string]interface{}
+							var rawError map[string]any
 							if rawStr, ok := raw.(string); ok && json.Unmarshal([]byte(rawStr), &rawError) == nil {
 								if rawErrorData, ok := rawError["error"]; ok {
-									if rawErrorMap, ok := rawErrorData.(map[string]interface{}); ok {
+									if rawErrorMap, ok := rawErrorData.(map[string]any); ok {
 										if rawMsg, ok := rawErrorMap["message"]; ok {
 											return fmt.Sprintf("API Error: %s", rawMsg)
 										}
@@ -518,20 +517,30 @@ func extractDetailedErrorFromBytes(body []byte, statusCode int) string {
 					}
 					details = append(details, fmt.Sprintf("Metadata: %v", metadata))
 				}
-
 				if len(details) > 0 {
 					return fmt.Sprintf("API Error: %s (%s)", errorMsg, strings.Join(details, ", "))
 				}
-
 				return "API Error: " + errorMsg
 			}
 		}
 	}
-
 	// If not a structured error response, return the raw body with status
 	return fmt.Sprintf("HTTP Status: %d, Response Body: %s", statusCode, string(body))
 }

+func finalizeRespStats(tokenCount int, startTime time.Time) {
+	duration := time.Since(startTime).Seconds()
+	var tps float64
+	if duration > 0 {
+		tps = float64(tokenCount) / duration
+	}
+	lastRespStats = &models.ResponseStats{
+		Tokens:       tokenCount,
+		Duration:     duration,
+		TokensPerSec: tps,
+	}
+}
+
 // sendMsgToLLM expects streaming resp
 func sendMsgToLLM(body io.Reader) {
 	choseChunkParser()
@@ -586,12 +595,17 @@ func sendMsgToLLM(body io.Reader) {
 		streamDone <- true
 		return
 	}
-
+	//
 	defer resp.Body.Close()
 	reader := bufio.NewReader(resp.Body)
 	counter := uint32(0)
+	tokenCount := 0
+	startTime := time.Now()
 	hasReasoning := false
 	reasoningSent := false
+	defer func() {
+		finalizeRespStats(tokenCount, startTime)
+	}()
 	for {
 		var (
 			answerText string
@@ -667,11 +681,13 @@ func sendMsgToLLM(body io.Reader) {
 			// Close the thinking block if we were streaming reasoning and haven't closed it yet
 			if hasReasoning && !reasoningSent {
 				chunkChan <- "</think>"
+				tokenCount++
 			}
 			if chunk.Chunk != "" {
 				logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
 				answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
 				chunkChan <- answerText
+				tokenCount++
 			}
 			streamDone <- true
 			break
@@ -684,12 +700,14 @@ func sendMsgToLLM(body io.Reader) {
 			if !hasReasoning {
 				// First reasoning chunk - send opening tag
 				chunkChan <- "<think>"
+				tokenCount++
 				hasReasoning = true
 			}
 			// Stream reasoning content immediately
 			answerText = strings.ReplaceAll(chunk.Reasoning, "\n\n", "\n")
 			if answerText != "" {
 				chunkChan <- answerText
+				tokenCount++
 			}
 		}

@@ -697,6 +715,7 @@ func sendMsgToLLM(body io.Reader) {
 		if chunk.Chunk != "" && hasReasoning && !reasoningSent {
 			// Close the thinking block before sending actual content
 			chunkChan <- "</think>"
+			tokenCount++
 			reasoningSent = true
 		}

@@ -709,9 +728,11 @@ func sendMsgToLLM(body io.Reader) {
 			slices.Contains(stopStrings, answerText) {
 			logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
 			streamDone <- true
+			break
 		}
 		if answerText != "" {
 			chunkChan <- answerText
+			tokenCount++
 		}
 		openAIToolChan <- chunk.ToolChunk
 		if chunk.FuncName != "" {
@@ -914,7 +935,6 @@ out:
 				textView.ScrollToEnd()
 			}
 		case <-streamDone:
-			// drain any remaining chunks from chunkChan before exiting
 			for len(chunkChan) > 0 {
 				chunk := <-chunkChan
 				fmt.Fprint(textView, chunk)
@@ -923,31 +943,40 @@ out:
 					textView.ScrollToEnd()
 				}
 				if cfg.TTS_ENABLED {
-					// Send chunk to audio stream handler
 					TTSTextChan <- chunk
 				}
 			}
 			if cfg.TTS_ENABLED {
-				// msg is done; flush it down
 				TTSFlushChan <- true
 			}
 			break out
 		}
 	}
+	var msgStats *models.ResponseStats
+	if lastRespStats != nil {
+		msgStats = &models.ResponseStats{
+			Tokens:       lastRespStats.Tokens,
+			Duration:     lastRespStats.Duration,
+			TokensPerSec: lastRespStats.TokensPerSec,
+		}
+		lastRespStats = nil
+	}
 	botRespMode = false
-	// numbers in chatbody and displayed must be the same
 	if r.Resume {
 		chatBody.Messages[len(chatBody.Messages)-1].Content += respText.String()
-		// lastM.Content = lastM.Content + respText.String()
-		// Process the updated message to check for known_to tags in resumed response
 		updatedMsg := chatBody.Messages[len(chatBody.Messages)-1]
 		processedMsg := processMessageTag(&updatedMsg)
 		chatBody.Messages[len(chatBody.Messages)-1] = *processedMsg
+		if msgStats != nil && chatBody.Messages[len(chatBody.Messages)-1].Role != cfg.ToolRole {
+			chatBody.Messages[len(chatBody.Messages)-1].Stats = msgStats
+		}
 	} else {
-		// Message was already added at the start, just process it for known_to tags
 		chatBody.Messages[msgIdx].Content = respText.String()
 		processedMsg := processMessageTag(&chatBody.Messages[msgIdx])
 		chatBody.Messages[msgIdx] = *processedMsg
+		if msgStats != nil && chatBody.Messages[msgIdx].Role != cfg.ToolRole {
+			chatBody.Messages[msgIdx].Stats = msgStats
+		}
 		stopTTSIfNotForUser(&chatBody.Messages[msgIdx])
 	}
 	cleanChatBody()
--- a/models/models.go
+++ b/models/models.go
@@ -105,12 +105,13 @@ type ImageContentPart struct {

 // RoleMsg represents a message with content that can be either a simple string or structured content parts
 type RoleMsg struct {
-	Role            string   `json:"role"`
-	Content         string   `json:"-"`
-	ContentParts    []any    `json:"-"`
-	ToolCallID      string   `json:"tool_call_id,omitempty"` // For tool response messages
-	KnownTo         []string `json:"known_to,omitempty"`
-	hasContentParts bool     // Flag to indicate which content type to marshal
+	Role            string         `json:"role"`
+	Content         string         `json:"-"`
+	ContentParts    []any          `json:"-"`
+	ToolCallID      string         `json:"tool_call_id,omitempty"` // For tool response messages
+	KnownTo         []string       `json:"known_to,omitempty"`
+	Stats           *ResponseStats `json:"stats"`
+	hasContentParts bool           // Flag to indicate which content type to marshal
 }

 // MarshalJSON implements custom JSON marshaling for RoleMsg
@@ -183,13 +184,11 @@ func (m *RoleMsg) UnmarshalJSON(data []byte) error {
 }

 func (m *RoleMsg) ToText(i int) string {
-	// Convert content to string representation
 	var contentStr string
 	var imageIndicators []string
 	if !m.hasContentParts {
 		contentStr = m.Content
 	} else {
-		// For structured content, collect text parts and image indicators
 		var textParts []string
 		for _, part := range m.ContentParts {
 			switch p := part.(type) {
@@ -198,7 +197,6 @@ func (m *RoleMsg) ToText(i int) string {
 					textParts = append(textParts, p.Text)
 				}
 			case ImageContentPart:
-				// Collect image indicator
 				displayPath := p.Path
 				if displayPath == "" {
 					displayPath = "image"
@@ -216,7 +214,6 @@ func (m *RoleMsg) ToText(i int) string {
 							}
 						}
 					case "image_url":
-						// Handle unmarshaled image content
 						var displayPath string
 						if pathVal, pathExists := p["path"]; pathExists {
 							if pathStr, isStr := pathVal.(string); isStr && pathStr != "" {
@@ -233,23 +230,20 @@ func (m *RoleMsg) ToText(i int) string {
 		}
 		contentStr = strings.Join(textParts, " ") + " "
 	}
-	// check if already has role annotation (/completion makes them)
-	// in that case remove it, and then add to icon
-	// since icon and content are separated by \n
 	contentStr, _ = strings.CutPrefix(contentStr, m.Role+":")
-	// if !strings.HasPrefix(contentStr, m.Role+":") {
 	icon := fmt.Sprintf("(%d) <%s>: ", i, m.Role)
-	// }
-	// Build final message with image indicators before text
 	var finalContent strings.Builder
 	if len(imageIndicators) > 0 {
-		// Add each image indicator on its own line
 		for _, indicator := range imageIndicators {
 			finalContent.WriteString(indicator)
 			finalContent.WriteString("\n")
 		}
 	}
 	finalContent.WriteString(contentStr)
+	if m.Stats != nil {
+		finalContent.WriteString(fmt.Sprintf("\n[gray::i][%d tok, %.1fs, %.1f t/s][-:-:-]",
+			m.Stats.Tokens, m.Stats.Duration, m.Stats.TokensPerSec))
+	}
 	textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, finalContent.String())
 	return strings.ReplaceAll(textMsg, "\n\n", "\n")
 }
@@ -331,6 +325,7 @@ func (m *RoleMsg) Copy() RoleMsg {
 		ContentParts:    m.ContentParts,
 		ToolCallID:      m.ToolCallID,
 		KnownTo:         m.KnownTo,
+		Stats:           m.Stats,
 		hasContentParts: m.hasContentParts,
 	}
 }
@@ -643,6 +638,12 @@ func (lcp *LCPModels) ListModels() []string {
 	return resp
 }

+type ResponseStats struct {
+	Tokens       int
+	Duration     float64
+	TokensPerSec float64
+}
+
 type ChatRoundReq struct {
 	UserMsg string
 	Role    string
Author	SHA1	Message	Date
Grail Finder	ef53e9bebe	Enha: json tag for stats	2026-02-23 09:35:40 +03:00
Grail Finder	a546bfe596	Enha: defer finalizeRespStats	2026-02-23 09:30:37 +03:00
Grail Finder	23c21f87bb	Feat: show stats	2026-02-23 09:18:19 +03:00