4 Commits

Author SHA1 Message Date
Grail Finder
2c694e2b2b Enha: add (loaded) suffix if model is loaded 2026-02-21 20:42:43 +03:00
Grail Finder
66ccb7a732 Fix: collapsing thinking while thinking 2026-02-21 20:28:12 +03:00
Grail Finder
deece322ef Fix: collapse live thinking removing role 2026-02-21 20:24:15 +03:00
Grail Finder
e7c8fef32d Feat: collapse thinking during gen 2026-02-21 17:10:58 +03:00
2 changed files with 124 additions and 26 deletions

136
bot.go
View File

@@ -403,6 +403,23 @@ func fetchLCPModels() ([]string, error) {
return localModels, nil return localModels, nil
} }
// fetchLCPModelsWithLoadStatus returns models with "(loaded)" indicator for loaded models
func fetchLCPModelsWithLoadStatus() ([]string, error) {
models, err := fetchLCPModelsWithStatus()
if err != nil {
return nil, err
}
result := make([]string, 0, len(models.Data))
for _, m := range models.Data {
modelName := m.ID
if m.Status.Value == "loaded" {
modelName = modelName + " (loaded)"
}
result = append(result, modelName)
}
return result, nil
}
// fetchLCPModelsWithStatus returns the full LCPModels struct including status information. // fetchLCPModelsWithStatus returns the full LCPModels struct including status information.
func fetchLCPModelsWithStatus() (*models.LCPModels, error) { func fetchLCPModelsWithStatus() (*models.LCPModels, error) {
resp, err := http.Get(cfg.FetchModelNameAPI) resp, err := http.Get(cfg.FetchModelNameAPI)
@@ -573,7 +590,6 @@ func sendMsgToLLM(body io.Reader) {
defer resp.Body.Close() defer resp.Body.Close()
reader := bufio.NewReader(resp.Body) reader := bufio.NewReader(resp.Body)
counter := uint32(0) counter := uint32(0)
reasoningBuffer := strings.Builder{}
hasReasoning := false hasReasoning := false
reasoningSent := false reasoningSent := false
for { for {
@@ -648,11 +664,9 @@ func sendMsgToLLM(body io.Reader) {
// break // break
// } // }
if chunk.Finished { if chunk.Finished {
// Send any remaining reasoning if not already sent // Close the thinking block if we were streaming reasoning and haven't closed it yet
if hasReasoning && !reasoningSent { if hasReasoning && !reasoningSent {
reasoningText := "<think>" + reasoningBuffer.String() + "</think>" chunkChan <- "</think>"
answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
chunkChan <- answerText
} }
if chunk.Chunk != "" { if chunk.Chunk != "" {
logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter) logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
@@ -665,17 +679,24 @@ func sendMsgToLLM(body io.Reader) {
if counter == 0 { if counter == 0 {
chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ") chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ")
} }
// Handle reasoning chunks - buffer them and prepend when content starts // Handle reasoning chunks - stream them immediately as they arrive
if chunk.Reasoning != "" && !reasoningSent { if chunk.Reasoning != "" && !reasoningSent {
reasoningBuffer.WriteString(chunk.Reasoning) if !hasReasoning {
// First reasoning chunk - send opening tag
chunkChan <- "<think>"
hasReasoning = true hasReasoning = true
} }
// Stream reasoning content immediately
// When we get content and have buffered reasoning, send reasoning first answerText = strings.ReplaceAll(chunk.Reasoning, "\n\n", "\n")
if chunk.Chunk != "" && hasReasoning && !reasoningSent { if answerText != "" {
reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
chunkChan <- answerText chunkChan <- answerText
}
}
// When we get content and have been streaming reasoning, close the thinking block
if chunk.Chunk != "" && hasReasoning && !reasoningSent {
// Close the thinking block before sending actual content
chunkChan <- "</think>"
reasoningSent = true reasoningSent = true
} }
@@ -807,23 +828,78 @@ func chatRound(r *models.ChatRoundReq) error {
} }
go sendMsgToLLM(reader) go sendMsgToLLM(reader)
logger.Debug("looking at vars in chatRound", "msg", r.UserMsg, "regen", r.Regen, "resume", r.Resume) logger.Debug("looking at vars in chatRound", "msg", r.UserMsg, "regen", r.Regen, "resume", r.Resume)
msgIdx := len(chatBody.Messages)
if !r.Resume { if !r.Resume {
fmt.Fprintf(textView, "\n[-:-:b](%d) ", len(chatBody.Messages)) // Add empty message to chatBody immediately so it persists during Alt+T toggle
chatBody.Messages = append(chatBody.Messages, models.RoleMsg{
Role: botPersona, Content: "",
})
fmt.Fprintf(textView, "\n[-:-:b](%d) ", msgIdx)
fmt.Fprint(textView, roleToIcon(botPersona)) fmt.Fprint(textView, roleToIcon(botPersona))
fmt.Fprint(textView, "[-:-:-]\n") fmt.Fprint(textView, "[-:-:-]\n")
if cfg.ThinkUse && !strings.Contains(cfg.CurrentAPI, "v1") { if cfg.ThinkUse && !strings.Contains(cfg.CurrentAPI, "v1") {
// fmt.Fprint(textView, "<think>") // fmt.Fprint(textView, "<think>")
chunkChan <- "<think>" chunkChan <- "<think>"
} }
} else {
msgIdx = len(chatBody.Messages) - 1
} }
respText := strings.Builder{} respText := strings.Builder{}
toolResp := strings.Builder{} toolResp := strings.Builder{}
// Variables for handling thinking blocks during streaming
inThinkingBlock := false
thinkingBuffer := strings.Builder{}
justExitedThinkingCollapsed := false
out: out:
for { for {
select { select {
case chunk := <-chunkChan: case chunk := <-chunkChan:
// Handle thinking blocks during streaming
if strings.HasPrefix(chunk, "<think>") && !inThinkingBlock {
// Start of thinking block
inThinkingBlock = true
thinkingBuffer.Reset()
thinkingBuffer.WriteString(chunk)
if thinkingCollapsed {
// Show placeholder immediately when thinking starts in collapsed mode
fmt.Fprint(textView, "[yellow::i][thinking... (press Alt+T to expand)][-:-:-]")
if scrollToEndEnabled {
textView.ScrollToEnd()
}
respText.WriteString(chunk)
continue
}
} else if inThinkingBlock {
thinkingBuffer.WriteString(chunk)
if strings.Contains(chunk, "</think>") {
// End of thinking block
inThinkingBlock = false
if thinkingCollapsed {
// Thinking already displayed as placeholder, just update respText
respText.WriteString(chunk)
justExitedThinkingCollapsed = true
if scrollToEndEnabled {
textView.ScrollToEnd()
}
continue
}
// If not collapsed, fall through to normal display
} else if thinkingCollapsed {
// Still in thinking block and collapsed - just buffer, don't display
respText.WriteString(chunk)
continue
}
// If not collapsed, fall through to normal display
}
// Add spacing after collapsed thinking block before real response
if justExitedThinkingCollapsed {
chunk = "\n\n" + chunk
justExitedThinkingCollapsed = false
}
fmt.Fprint(textView, chunk) fmt.Fprint(textView, chunk)
respText.WriteString(chunk) respText.WriteString(chunk)
// Update the message in chatBody.Messages so it persists during Alt+T
chatBody.Messages[msgIdx].Content = respText.String()
if scrollToEndEnabled { if scrollToEndEnabled {
textView.ScrollToEnd() textView.ScrollToEnd()
} }
@@ -868,13 +944,11 @@ out:
processedMsg := processMessageTag(&updatedMsg) processedMsg := processMessageTag(&updatedMsg)
chatBody.Messages[len(chatBody.Messages)-1] = *processedMsg chatBody.Messages[len(chatBody.Messages)-1] = *processedMsg
} else { } else {
newMsg := models.RoleMsg{ // Message was already added at the start, just process it for known_to tags
Role: botPersona, Content: respText.String(), chatBody.Messages[msgIdx].Content = respText.String()
} processedMsg := processMessageTag(&chatBody.Messages[msgIdx])
// Process the new message to check for known_to tags in LLM response chatBody.Messages[msgIdx] = *processedMsg
newMsg = *processMessageTag(&newMsg) stopTTSIfNotForUser(&chatBody.Messages[msgIdx])
chatBody.Messages = append(chatBody.Messages, newMsg)
stopTTSIfNotForUser(&newMsg)
} }
cleanChatBody() cleanChatBody()
refreshChatDisplay() refreshChatDisplay()
@@ -1137,8 +1211,26 @@ func chatToText(messages []models.RoleMsg, showSys bool) string {
// Collapse thinking blocks if enabled // Collapse thinking blocks if enabled
if thinkingCollapsed { if thinkingCollapsed {
placeholder := "[yellow::i][thinking... (press Alt+T to expand)][-:-:-]" text = thinkRE.ReplaceAllStringFunc(text, func(match string) string {
text = thinkRE.ReplaceAllString(text, placeholder) // Extract content between <think> and </think>
start := len("<think>")
end := len(match) - len("</think>")
if start < end && start < len(match) {
content := match[start:end]
return fmt.Sprintf("[yellow::i][thinking... (%d chars) (press Alt+T to expand)][-:-:-]", len(content))
}
return "[yellow::i][thinking... (press Alt+T to expand)][-:-:-]"
})
// Handle incomplete thinking blocks (during streaming when </think> hasn't arrived yet)
if strings.Contains(text, "<think>") && !strings.Contains(text, "</think>") {
// Find the incomplete thinking block and replace it
startIdx := strings.Index(text, "<think>")
if startIdx != -1 {
content := text[startIdx+len("<think>"):]
placeholder := fmt.Sprintf("[yellow::i][thinking... (%d chars) (press Alt+T to expand)][-:-:-]", len(content))
text = text[:startIdx] + placeholder
}
}
} }
return text return text

View File

@@ -17,10 +17,14 @@ func showModelSelectionPopup() {
} else if strings.Contains(api, "openrouter.ai") { } else if strings.Contains(api, "openrouter.ai") {
return ORFreeModels return ORFreeModels
} }
// Assume local llama.cpp // Assume local llama.cpp - fetch with load status
updateModelLists() models, err := fetchLCPModelsWithLoadStatus()
if err != nil {
logger.Error("failed to fetch models with load status", "error", err)
return LocalModels return LocalModels
} }
return models
}
// Get the current model list based on the API // Get the current model list based on the API
modelList := getModelListForAPI(cfg.CurrentAPI) modelList := getModelListForAPI(cfg.CurrentAPI)
// Check for empty options list // Check for empty options list
@@ -57,8 +61,10 @@ func showModelSelectionPopup() {
modelListWidget.SetCurrentItem(currentModelIndex) modelListWidget.SetCurrentItem(currentModelIndex)
} }
modelListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) { modelListWidget.SetSelectedFunc(func(index int, mainText string, secondaryText string, shortcut rune) {
// Strip "(loaded)" suffix if present for local llama.cpp models
modelName := strings.TrimSuffix(mainText, " (loaded)")
// Update the model in both chatBody and config // Update the model in both chatBody and config
chatBody.Model = mainText chatBody.Model = modelName
cfg.CurrentModel = chatBody.Model cfg.CurrentModel = chatBody.Model
// Remove the popup page // Remove the popup page
pages.RemovePage("modelSelectionPopup") pages.RemovePage("modelSelectionPopup")