Enha: add (loaded) suffix if model is loaded

Fix: collapsing thinking while thinking
Fix: collapse live thinking removing role
2026-02-21 20:42:43 +03:00 · 2026-02-21 20:28:12 +03:00 · 2026-02-21 20:24:15 +03:00 · 2026-02-21 17:10:58 +03:00
2 changed files with 124 additions and 26 deletions
--- a/bot.go
+++ b/bot.go
@@ -403,6 +403,23 @@ func fetchLCPModels() ([]string, error) {
 	return localModels, nil
 }
 // fetchLCPModelsWithLoadStatus returns models with "(loaded)" indicator for loaded models
 func fetchLCPModelsWithLoadStatus() ([]string, error) {
 	models, err := fetchLCPModelsWithStatus()
 	if err != nil {
 		return nil, err
 	}
 	result := make([]string, 0, len(models.Data))
 	for _, m := range models.Data {
 		modelName := m.ID
 		if m.Status.Value == "loaded" {
 			modelName = modelName + " (loaded)"
 		}
 		result = append(result, modelName)
 	}
 	return result, nil
 }
 // fetchLCPModelsWithStatus returns the full LCPModels struct including status information.
 func fetchLCPModelsWithStatus() (*models.LCPModels, error) {
 	resp, err := http.Get(cfg.FetchModelNameAPI)
@@ -573,7 +590,6 @@ func sendMsgToLLM(body io.Reader) {
 	defer resp.Body.Close()
 	reader := bufio.NewReader(resp.Body)
 	counter := uint32(0)
 	reasoningBuffer := strings.Builder{}
 	hasReasoning := false
 	reasoningSent := false
 	for {
@@ -648,11 +664,9 @@ func sendMsgToLLM(body io.Reader) {
 		// 	break
 		// }
 		if chunk.Finished {
-			// Send any remaining reasoning if not already sent
+			// Close the thinking block if we were streaming reasoning and haven't closed it yet
 			if hasReasoning && !reasoningSent {
-				reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
+				chunkChan <- "</think>"
 				answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
 				chunkChan <- answerText
 			}
 			if chunk.Chunk != "" {
 				logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
@@ -665,17 +679,24 @@ func sendMsgToLLM(body io.Reader) {
 		if counter == 0 {
 			chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ")
 		}
-		// Handle reasoning chunks - buffer them and prepend when content starts
+		// Handle reasoning chunks - stream them immediately as they arrive
 		if chunk.Reasoning != "" && !reasoningSent {
-			reasoningBuffer.WriteString(chunk.Reasoning)
+			if !hasReasoning {
 				// First reasoning chunk - send opening tag
 				chunkChan <- "<think>"
 				hasReasoning = true
 			}
-
+			// Stream reasoning content immediately
-		// When we get content and have buffered reasoning, send reasoning first
+			answerText = strings.ReplaceAll(chunk.Reasoning, "\n\n", "\n")
-		if chunk.Chunk != "" && hasReasoning && !reasoningSent {
+			if answerText != "" {
 			reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
 			answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
 				chunkChan <- answerText
 			}
 		}
 		// When we get content and have been streaming reasoning, close the thinking block
 		if chunk.Chunk != "" && hasReasoning && !reasoningSent {
 			// Close the thinking block before sending actual content
 			chunkChan <- "</think>"
 			reasoningSent = true
 		}
@@ -807,23 +828,78 @@ func chatRound(r *models.ChatRoundReq) error {
 	}
 	go sendMsgToLLM(reader)
 	logger.Debug("looking at vars in chatRound", "msg", r.UserMsg, "regen", r.Regen, "resume", r.Resume)
 	msgIdx := len(chatBody.Messages)
 	if !r.Resume {
-		fmt.Fprintf(textView, "\n[-:-:b](%d) ", len(chatBody.Messages))
+		// Add empty message to chatBody immediately so it persists during Alt+T toggle
 		chatBody.Messages = append(chatBody.Messages, models.RoleMsg{
 			Role: botPersona, Content: "",
 		})
 		fmt.Fprintf(textView, "\n[-:-:b](%d) ", msgIdx)
 		fmt.Fprint(textView, roleToIcon(botPersona))
 		fmt.Fprint(textView, "[-:-:-]\n")
 		if cfg.ThinkUse && !strings.Contains(cfg.CurrentAPI, "v1") {
 			// fmt.Fprint(textView, "<think>")
 			chunkChan <- "<think>"
 		}
 	} else {
 		msgIdx = len(chatBody.Messages) - 1
 	}
 	respText := strings.Builder{}
 	toolResp := strings.Builder{}
 	// Variables for handling thinking blocks during streaming
 	inThinkingBlock := false
 	thinkingBuffer := strings.Builder{}
 	justExitedThinkingCollapsed := false
 out:
 	for {
 		select {
 		case chunk := <-chunkChan:
 			// Handle thinking blocks during streaming
 			if strings.HasPrefix(chunk, "<think>") && !inThinkingBlock {
 				// Start of thinking block
 				inThinkingBlock = true
 				thinkingBuffer.Reset()
 				thinkingBuffer.WriteString(chunk)
 				if thinkingCollapsed {
 					// Show placeholder immediately when thinking starts in collapsed mode
 					fmt.Fprint(textView, "[yellow::i][thinking... (press Alt+T to expand)][-:-:-]")
 					if scrollToEndEnabled {
 						textView.ScrollToEnd()
 					}
 					respText.WriteString(chunk)
 					continue
 				}
 			} else if inThinkingBlock {
 				thinkingBuffer.WriteString(chunk)
 				if strings.Contains(chunk, "</think>") {
 					// End of thinking block
 					inThinkingBlock = false
 					if thinkingCollapsed {
 						// Thinking already displayed as placeholder, just update respText
 						respText.WriteString(chunk)
 						justExitedThinkingCollapsed = true
 						if scrollToEndEnabled {
 							textView.ScrollToEnd()
 						}
 						continue
 					}
 					// If not collapsed, fall through to normal display
 				} else if thinkingCollapsed {
 					// Still in thinking block and collapsed - just buffer, don't display
 					respText.WriteString(chunk)
 					continue
 				}
 				// If not collapsed, fall through to normal display
 			}
 			// Add spacing after collapsed thinking block before real response
 			if justExitedThinkingCollapsed {
 				chunk = "\n\n" + chunk
 				justExitedThinkingCollapsed = false
 			}
 			fmt.Fprint(textView, chunk)
 			respText.WriteString(chunk)
 			// Update the message in chatBody.Messages so it persists during Alt+T
 			chatBody.Messages[msgIdx].Content = respText.String()
 			if scrollToEndEnabled {
 				textView.ScrollToEnd()
 			}
@@ -868,13 +944,11 @@ out:
 		processedMsg := processMessageTag(&updatedMsg)
 		chatBody.Messages[len(chatBody.Messages)-1] = *processedMsg
 	} else {
-		newMsg := models.RoleMsg{
+		// Message was already added at the start, just process it for known_to tags
-			Role: botPersona, Content: respText.String(),
+		chatBody.Messages[msgIdx].Content = respText.String()
-		}
+		processedMsg := processMessageTag(&chatBody.Messages[msgIdx])
-		// Process the new message to check for known_to tags in LLM response
+		chatBody.Messages[msgIdx] = *processedMsg
-		newMsg = *processMessageTag(&newMsg)
+		stopTTSIfNotForUser(&chatBody.Messages[msgIdx])
 		chatBody.Messages = append(chatBody.Messages, newMsg)
 		stopTTSIfNotForUser(&newMsg)
 	}
 	cleanChatBody()
 	refreshChatDisplay()
@@ -1137,8 +1211,26 @@ func chatToText(messages []models.RoleMsg, showSys bool) string {
 	// Collapse thinking blocks if enabled
 	if thinkingCollapsed {
-		placeholder := "[yellow::i][thinking... (press Alt+T to expand)][-:-:-]"
+		text = thinkRE.ReplaceAllStringFunc(text, func(match string) string {
-		text = thinkRE.ReplaceAllString(text, placeholder)
+			// Extract content between <think> and </think>
 			start := len("<think>")
 			end := len(match) - len("</think>")
 			if start < end && start < len(match) {
 				content := match[start:end]
 				return fmt.Sprintf("[yellow::i][thinking... (%d chars) (press Alt+T to expand)][-:-:-]", len(content))
 			}
 			return "[yellow::i][thinking... (press Alt+T to expand)][-:-:-]"
 		})
 		// Handle incomplete thinking blocks (during streaming when </think> hasn't arrived yet)
 		if strings.Contains(text, "<think>") && !strings.Contains(text, "</think>") {
 			// Find the incomplete thinking block and replace it
 			startIdx := strings.Index(text, "<think>")
 			if startIdx != -1 {
 				content := text[startIdx+len("<think>"):]
 				placeholder := fmt.Sprintf("[yellow::i][thinking... (%d chars) (press Alt+T to expand)][-:-:-]", len(content))
 				text = text[:startIdx] + placeholder
 			}
 		}
 	}
 	return text
--- a/popups.go
+++ b/popups.go
@@ -17,10 +17,14 @@ func showModelSelectionPopup() {
 		} else if strings.Contains(api, "openrouter.ai") {
 			return ORFreeModels
 		}
-		// Assume local llama.cpp
+		// Assume local llama.cpp - fetch with load status
-		updateModelLists()
+		models, err := fetchLCPModelsWithLoadStatus()
 		if err != nil {
 			logger.Error("failed to fetch models with load status", "error", err)
 			return LocalModels
 		}
 		return models
 	}
 	// Get the current model list based on the API
 	modelList := getModelListForAPI(cfg.CurrentAPI)
 	// Check for empty options list
@@ -57,8 +61,10 @@ func showModelSelectionPopup() {
 		modelListWidget.SetCurrentItem(currentModelIndex)
 	}
 	modelListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
 		// Strip "(loaded)" suffix if present for local llama.cpp models
 		modelName := strings.TrimSuffix(mainText, " (loaded)")
 		// Update the model in both chatBody and config
-		chatBody.Model = mainText
+		chatBody.Model = modelName
 		cfg.CurrentModel = chatBody.Model
 		// Remove the popup page
 		pages.RemovePage("modelSelectionPopup")
Author	SHA1	Message	Date
Grail Finder	2c694e2b2b	Enha: add (loaded) suffix if model is loaded	2026-02-21 20:42:43 +03:00
Grail Finder	66ccb7a732	Fix: collapsing thinking while thinking	2026-02-21 20:28:12 +03:00
Grail Finder	deece322ef	Fix: collapse live thinking removing role	2026-02-21 20:24:15 +03:00
Grail Finder	e7c8fef32d	Feat: collapse thinking during gen	2026-02-21 17:10:58 +03:00