Feat (pull/18994): llama.cpp reasoning

Feat: openrouter reasoning
Chore: status line, linter complaints
2026-02-21 16:31:59 +03:00 · 2026-02-21 16:26:13 +03:00 · 2026-02-21 10:15:36 +03:00
13 changed files with 106 additions and 51 deletions
--- a/agent/request.go
+++ b/agent/request.go
@@ -110,8 +110,8 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
 			req := models.NewDSChatReq(*chatBody)
 			return json.Marshal(req)
 		case isOpenRouter:
-			// OpenRouter chat
-			req := models.NewOpenRouterChatReq(*chatBody, defaultProps)
+			// OpenRouter chat - agents don't use reasoning by default
+			req := models.NewOpenRouterChatReq(*chatBody, defaultProps, "")
 			return json.Marshal(req)
 		default:
 			// Assume llama.cpp chat (OpenAI format)
--- a/bot.go
+++ b/bot.go
@@ -573,6 +573,9 @@ func sendMsgToLLM(body io.Reader) {
 	defer resp.Body.Close()
 	reader := bufio.NewReader(resp.Body)
 	counter := uint32(0)
+	reasoningBuffer := strings.Builder{}
+	hasReasoning := false
+	reasoningSent := false
 	for {
 		var (
 			answerText string
@@ -645,6 +648,12 @@ func sendMsgToLLM(body io.Reader) {
 		// 	break
 		// }
 		if chunk.Finished {
+			// Send any remaining reasoning if not already sent
+			if hasReasoning && !reasoningSent {
+				reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
+				answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
+				chunkChan <- answerText
+			}
 			if chunk.Chunk != "" {
 				logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
 				answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
@@ -656,6 +665,20 @@ func sendMsgToLLM(body io.Reader) {
 		if counter == 0 {
 			chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ")
 		}
+		// Handle reasoning chunks - buffer them and prepend when content starts
+		if chunk.Reasoning != "" && !reasoningSent {
+			reasoningBuffer.WriteString(chunk.Reasoning)
+			hasReasoning = true
+		}
+
+		// When we get content and have buffered reasoning, send reasoning first
+		if chunk.Chunk != "" && hasReasoning && !reasoningSent {
+			reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
+			answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
+			chunkChan <- answerText
+			reasoningSent = true
+		}
+
 		// bot sends way too many \n
 		answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
 		// Accumulate text to check for stop strings that might span across chunks
@@ -666,7 +689,9 @@ func sendMsgToLLM(body io.Reader) {
 			logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
 			streamDone <- true
 		}
+		if answerText != "" {
 			chunkChan <- answerText
+		}
 		openAIToolChan <- chunk.ToolChunk
 		if chunk.FuncName != "" {
 			lastToolCall.Name = chunk.FuncName
--- a/config.example.toml
+++ b/config.example.toml
@@ -50,3 +50,7 @@ CharSpecificContextEnabled = true
 CharSpecificContextTag = "@"
 AutoTurn = true
 StripThinkingFromAPI = true  # Strip <think> blocks from messages before sending to LLM (keeps them in chat history)
+# OpenRouter reasoning configuration (only applies to OpenRouter chat API)
+# Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled)
+# Models that support reasoning will include thinking content wrapped in <think> tags
+ReasoningEffort = "medium"
--- a/config/config.go
+++ b/config/config.go
@@ -20,6 +20,7 @@ type Config struct {
 	ToolUse                       bool   `toml:"ToolUse"`
 	ThinkUse                      bool   `toml:"ThinkUse"`
 	StripThinkingFromAPI          bool   `toml:"StripThinkingFromAPI"`
+	ReasoningEffort               string `toml:"ReasoningEffort"`
 	AssistantRole                 string `toml:"AssistantRole"`
 	SysDir                        string `toml:"SysDir"`
 	ChunkLimit                    uint32 `toml:"ChunkLimit"`
--- a/helpfuncs.go
+++ b/helpfuncs.go
@@ -354,10 +354,15 @@ func makeStatusLine() string {
 	}
 	// Get model color based on load status for local llama.cpp models
 	modelColor := getModelColor()
-	statusLine := fmt.Sprintf(indexLineCompletion, boolColors[botRespMode], botRespMode, activeChatName,
+	statusLine := fmt.Sprintf(statusLineTempl, boolColors[botRespMode], botRespMode, activeChatName,
 		boolColors[cfg.ToolUse], cfg.ToolUse, modelColor, chatBody.Model, boolColors[cfg.SkipLLMResp],
 		cfg.SkipLLMResp, cfg.CurrentAPI, boolColors[isRecording], isRecording, persona,
-		botPersona, boolColors[injectRole], injectRole)
+		botPersona)
+	// completion endpoint
+	if !strings.Contains(cfg.CurrentAPI, "chat") {
+		roleInject := fmt.Sprintf(" | role injection [%s:-:b]%v[-:-:-] (alt+7)", boolColors[injectRole], injectRole)
+		statusLine += roleInject
+	}
 	return statusLine + imageInfo + shellModeInfo
 }

@@ -741,7 +746,6 @@ func scanFiles(dir, filter string) []string {
 	const maxDepth = 3
 	const maxFiles = 50
 	var files []string
-
 	var scanRecursive func(currentDir string, currentDepth int, relPath string)
 	scanRecursive = func(currentDir string, currentDepth int, relPath string) {
 		if len(files) >= maxFiles {
@@ -750,39 +754,33 @@ func scanFiles(dir, filter string) []string {
 		if currentDepth > maxDepth {
 			return
 		}
-
 		entries, err := os.ReadDir(currentDir)
 		if err != nil {
 			return
 		}
-
 		for _, entry := range entries {
 			if len(files) >= maxFiles {
 				return
 			}
-
 			name := entry.Name()
 			if strings.HasPrefix(name, ".") {
 				continue
 			}
-
 			fullPath := name
 			if relPath != "" {
 				fullPath = relPath + "/" + name
 			}
-
 			if entry.IsDir() {
 				// Recursively scan subdirectories
 				scanRecursive(filepath.Join(currentDir, name), currentDepth+1, fullPath)
-			} else {
+				continue
+			}
 			// Check if file matches filter
 			if filter == "" || strings.HasPrefix(strings.ToLower(fullPath), strings.ToLower(filter)) {
 				files = append(files, fullPath)
 			}
 		}
 	}
-	}
-
 	scanRecursive(dir, 0, "")
 	return files
 }
--- a/llm.go
+++ b/llm.go
@@ -237,8 +237,10 @@ func (op LCPChat) ParseChunk(data []byte) (*models.TextChunk, error) {
 		return &models.TextChunk{Finished: true}, nil
 	}

+	lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1]
 	resp := &models.TextChunk{
-		Chunk: llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content,
+		Chunk:     lastChoice.Delta.Content,
+		Reasoning: lastChoice.Delta.ReasoningContent,
 	}

 	// Check for tool calls in all choices, not just the last one
@@ -256,7 +258,7 @@ func (op LCPChat) ParseChunk(data []byte) (*models.TextChunk, error) {
 		}
 	}

-	if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" {
+	if lastChoice.FinishReason == "stop" {
 		if resp.Chunk != "" {
 			logger.Error("text inside of finish llmchunk", "chunk", llmchunk)
 		}
@@ -614,12 +616,14 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
 		logger.Error("failed to decode", "error", err, "line", string(data))
 		return nil, err
 	}
+	lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1]
 	resp := &models.TextChunk{
-		Chunk: llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content,
+		Chunk:     lastChoice.Delta.Content,
+		Reasoning: lastChoice.Delta.Reasoning,
 	}
 	// Handle tool calls similar to LCPChat
-	if len(llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls) > 0 {
-		toolCall := llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls[0]
+	if len(lastChoice.Delta.ToolCalls) > 0 {
+		toolCall := lastChoice.Delta.ToolCalls[0]
 		resp.ToolChunk = toolCall.Function.Arguments
 		fname := toolCall.Function.Name
 		if fname != "" {
@@ -631,7 +635,7 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
 	if resp.ToolChunk != "" {
 		resp.ToolResp = true
 	}
-	if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" {
+	if lastChoice.FinishReason == "stop" {
 		if resp.Chunk != "" {
 			logger.Error("text inside of finish llmchunk", "chunk", llmchunk)
 		}
@@ -710,7 +714,7 @@ func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, erro
 	}
 	// Clean null/empty messages to prevent API issues
 	bodyCopy.Messages = consolidateAssistantMessages(bodyCopy.Messages)
-	orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps)
+	orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps, cfg.ReasoningEffort)
 	if cfg.ToolUse && !resume && role != cfg.ToolRole {
 		orBody.Tools = baseTools // set tools to use
 	}
--- a/main.go
+++ b/main.go
@@ -13,7 +13,7 @@ var (
 	selectedIndex     = int(-1)
 	shellMode         = false
 	thinkingCollapsed = false
-	indexLineCompletion = "F12 to show keys help | llm turn: [%s:-:b]%v[-:-:-] (F6) | chat: [orange:-:b]%s[-:-:-] (F1) | toolUseAdviced: [%s:-:b]%v[-:-:-] (ctrl+k) | model: [%s:-:b]%s[-:-:-] (ctrl+l) | skip LLM resp: [%s:-:b]%v[-:-:-] (F10)\nAPI: [orange:-:b]%s[-:-:-] (ctrl+v) | recording: [%s:-:b]%v[-:-:-] (ctrl+r) | writing as: [orange:-:b]%s[-:-:-] (ctrl+q) | bot will write as [orange:-:b]%s[-:-:-] (ctrl+x) | role injection (alt+7) [%s:-:b]%v[-:-:-]"
+	statusLineTempl   = "help (F12) | llm turn: [%s:-:b]%v[-:-:-] (F6) | chat: [orange:-:b]%s[-:-:-] (F1) |tool-use: [%s:-:b]%v[-:-:-] (ctrl+k) | model: [%s:-:b]%s[-:-:-] (ctrl+l) | skip LLM resp: [%s:-:b]%v[-:-:-] (F10)\nAPI: [orange:-:b]%s[-:-:-] (ctrl+v) | voice recording: [%s:-:b]%v[-:-:-] (ctrl+r) | writing as: [orange:-:b]%s[-:-:-] (ctrl+q) | bot will write as [orange:-:b]%s[-:-:-] (ctrl+x)"
 	focusSwitcher     = map[tview.Primitive]tview.Primitive{}
 )

--- a/models/models.go
+++ b/models/models.go
@@ -65,6 +65,7 @@ type LLMRespChunk struct {
 		Index        int    `json:"index"`
 		Delta        struct {
 			Content          string          `json:"content"`
+			ReasoningContent string          `json:"reasoning_content"`
 			ToolCalls        []ToolDeltaResp `json:"tool_calls"`
 		} `json:"delta"`
 	} `json:"choices"`
@@ -86,6 +87,7 @@ type TextChunk struct {
 	ToolResp  bool
 	FuncName  string
 	ToolID    string
+	Reasoning string // For models that send reasoning separately (OpenRouter, etc.)
 }

 type TextContentPart struct {
--- a/models/openrouter.go
+++ b/models/openrouter.go
@@ -32,10 +32,16 @@ type OpenRouterChatReq struct {
 	MinP        float32          `json:"min_p"`
 	NPredict    int32            `json:"max_tokens"`
 	Tools       []Tool           `json:"tools"`
+	Reasoning   *ReasoningConfig `json:"reasoning,omitempty"`
 }

-func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatReq {
-	return OpenRouterChatReq{
+type ReasoningConfig struct {
+	Effort  string `json:"effort,omitempty"`  // xhigh, high, medium, low, minimal, none
+	Summary string `json:"summary,omitempty"` // auto, concise, detailed
+}
+
+func NewOpenRouterChatReq(cb ChatBody, props map[string]float32, reasoningEffort string) OpenRouterChatReq {
+	req := OpenRouterChatReq{
 		Messages:    cb.Messages,
 		Model:       cb.Model,
 		Stream:      cb.Stream,
@@ -43,6 +49,13 @@ func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatR
 		MinP:        props["min_p"],
 		NPredict:    int32(props["n_predict"]),
 	}
+	// Only include reasoning config if effort is specified and not "none"
+	if reasoningEffort != "" && reasoningEffort != "none" {
+		req.Reasoning = &ReasoningConfig{
+			Effort: reasoningEffort,
+		}
+	}
+	return req
 }

 type OpenRouterChatRespNonStream struct {
@@ -82,6 +95,7 @@ type OpenRouterChatResp struct {
 		Delta struct {
 			Role      string          `json:"role"`
 			Content   string          `json:"content"`
+			Reasoning string          `json:"reasoning"`
 			ToolCalls []ToolDeltaResp `json:"tool_calls"`
 		} `json:"delta"`
 		FinishReason       string `json:"finish_reason"`
--- a/popups.go
+++ b/popups.go
@@ -388,7 +388,7 @@ func showFileCompletionPopup(filter string) {
 	app.SetFocus(widget)
 }

-func updateWidgetColors(theme tview.Theme) {
+func updateWidgetColors(theme *tview.Theme) {
 	bgColor := theme.PrimitiveBackgroundColor
 	fgColor := theme.PrimaryTextColor
 	borderColor := theme.BorderColor
@@ -476,7 +476,7 @@ func showColorschemeSelectionPopup() {
 			tview.Styles = theme
 			go func() {
 				app.QueueUpdateDraw(func() {
-					updateWidgetColors(theme)
+					updateWidgetColors(&theme)
 				})
 			}()
 		}
--- a/props_table.go
+++ b/props_table.go
@@ -149,6 +149,11 @@ func makePropsTable(props map[string]float32) *tview.Table {
 	addListPopupRow("Set log level", logLevels, GetLogLevel(), func(option string) {
 		setLogLevel(option)
 	})
+	// Add reasoning effort dropdown (for OpenRouter and supported APIs)
+	reasoningEfforts := []string{"", "none", "minimal", "low", "medium", "high", "xhigh"}
+	addListPopupRow("Reasoning effort (OR)", reasoningEfforts, cfg.ReasoningEffort, func(option string) {
+		cfg.ReasoningEffort = option
+	})
 	// Helper function to get model list for a given API
 	getModelListForAPI := func(api string) []string {
 		if strings.Contains(api, "api.deepseek.com/") {
--- a/tables.go
+++ b/tables.go
@@ -1046,6 +1046,7 @@ func makeFilePicker() *tview.Flex {
 						if bracketPos := strings.Index(itemText, " ["); bracketPos != -1 {
 							actualItemName = itemText[:bracketPos]
 						}
+						// nolint: gocritic
 						if strings.HasPrefix(actualItemName, "../") {
 							targetDir = path.Dir(currentDisplayDir)
 						} else if strings.HasSuffix(actualItemName, "/") {
--- a/tui.go
+++ b/tui.go
@@ -835,6 +835,7 @@ func init() {
 				lastMsg := chatBody.Messages[len(chatBody.Messages)-1]
 				cleanedText := models.CleanText(lastMsg.Content)
 				if cleanedText != "" {
+					// nolint: errcheck
 					go orator.Speak(cleanedText)
 				}
 			}
Author	SHA1	Message	Date
Grail Finder	eedda0ec4b	Feat (pull/18994): llama.cpp reasoning	2026-02-21 16:31:59 +03:00
Grail Finder	96ffbd5cf5	Feat: openrouter reasoning	2026-02-21 16:26:13 +03:00
Grail Finder	85b11fa9ff	Chore: status line, linter complaints	2026-02-21 10:15:36 +03:00