Feat: openrouter reasoning

2026-02-21 16:26:13 +03:00
parent 85b11fa9ff
commit 96ffbd5cf5
7 changed files with 69 additions and 21 deletions
--- a/agent/request.go
+++ b/agent/request.go
@@ -110,8 +110,8 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
 			req := models.NewDSChatReq(*chatBody)
 			return json.Marshal(req)
 		case isOpenRouter:
-			// OpenRouter chat
-			req := models.NewOpenRouterChatReq(*chatBody, defaultProps)
+			// OpenRouter chat - agents don't use reasoning by default
+			req := models.NewOpenRouterChatReq(*chatBody, defaultProps, "")
 			return json.Marshal(req)
 		default:
 			// Assume llama.cpp chat (OpenAI format)
--- a/bot.go
+++ b/bot.go
@@ -573,6 +573,9 @@ func sendMsgToLLM(body io.Reader) {
 	defer resp.Body.Close()
 	reader := bufio.NewReader(resp.Body)
 	counter := uint32(0)
+	reasoningBuffer := strings.Builder{}
+	hasReasoning := false
+	reasoningSent := false
 	for {
 		var (
 			answerText string
@@ -645,6 +648,13 @@ func sendMsgToLLM(body io.Reader) {
 		// 	break
 		// }
 		if chunk.Finished {
+			// Send any remaining reasoning if not already sent
+			if hasReasoning && !reasoningSent {
+				reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
+				answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
+				chunkChan <- answerText
+				reasoningSent = true
+			}
 			if chunk.Chunk != "" {
 				logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
 				answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
@@ -656,6 +666,20 @@ func sendMsgToLLM(body io.Reader) {
 		if counter == 0 {
 			chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ")
 		}
+		// Handle reasoning chunks - buffer them and prepend when content starts
+		if chunk.Reasoning != "" && !reasoningSent {
+			reasoningBuffer.WriteString(chunk.Reasoning)
+			hasReasoning = true
+		}
+
+		// When we get content and have buffered reasoning, send reasoning first
+		if chunk.Chunk != "" && hasReasoning && !reasoningSent {
+			reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
+			answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
+			chunkChan <- answerText
+			reasoningSent = true
+		}
+
 		// bot sends way too many \n
 		answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
 		// Accumulate text to check for stop strings that might span across chunks
@@ -666,7 +690,9 @@ func sendMsgToLLM(body io.Reader) {
 			logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
 			streamDone <- true
 		}
+		if answerText != "" {
 			chunkChan <- answerText
+		}
 		openAIToolChan <- chunk.ToolChunk
 		if chunk.FuncName != "" {
 			lastToolCall.Name = chunk.FuncName
--- a/config.example.toml
+++ b/config.example.toml
@@ -50,3 +50,7 @@ CharSpecificContextEnabled = true
 CharSpecificContextTag = "@"
 AutoTurn = true
 StripThinkingFromAPI = true  # Strip <think> blocks from messages before sending to LLM (keeps them in chat history)
+# OpenRouter reasoning configuration (only applies to OpenRouter chat API)
+# Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled)
+# Models that support reasoning will include thinking content wrapped in <think> tags
+ReasoningEffort = "medium"
--- a/config/config.go
+++ b/config/config.go
@@ -20,6 +20,7 @@ type Config struct {
 	ToolUse                       bool   `toml:"ToolUse"`
 	ThinkUse                      bool   `toml:"ThinkUse"`
 	StripThinkingFromAPI          bool   `toml:"StripThinkingFromAPI"`
+	ReasoningEffort               string `toml:"ReasoningEffort"`
 	AssistantRole                 string `toml:"AssistantRole"`
 	SysDir                        string `toml:"SysDir"`
 	ChunkLimit                    uint32 `toml:"ChunkLimit"`
--- a/llm.go
+++ b/llm.go
@@ -614,12 +614,14 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
 		logger.Error("failed to decode", "error", err, "line", string(data))
 		return nil, err
 	}
+	lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1]
 	resp := &models.TextChunk{
-		Chunk: llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content,
+		Chunk:     lastChoice.Delta.Content,
+		Reasoning: lastChoice.Delta.Reasoning,
 	}
 	// Handle tool calls similar to LCPChat
-	if len(llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls) > 0 {
-		toolCall := llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls[0]
+	if len(lastChoice.Delta.ToolCalls) > 0 {
+		toolCall := lastChoice.Delta.ToolCalls[0]
 		resp.ToolChunk = toolCall.Function.Arguments
 		fname := toolCall.Function.Name
 		if fname != "" {
@@ -631,7 +633,7 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
 	if resp.ToolChunk != "" {
 		resp.ToolResp = true
 	}
-	if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" {
+	if lastChoice.FinishReason == "stop" {
 		if resp.Chunk != "" {
 			logger.Error("text inside of finish llmchunk", "chunk", llmchunk)
 		}
@@ -710,7 +712,7 @@ func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, erro
 	}
 	// Clean null/empty messages to prevent API issues
 	bodyCopy.Messages = consolidateAssistantMessages(bodyCopy.Messages)
-	orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps)
+	orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps, cfg.ReasoningEffort)
 	if cfg.ToolUse && !resume && role != cfg.ToolRole {
 		orBody.Tools = baseTools // set tools to use
 	}
--- a/models/models.go
+++ b/models/models.go
@@ -86,6 +86,7 @@ type TextChunk struct {
 	ToolResp  bool
 	FuncName  string
 	ToolID    string
+	Reasoning string // For models that send reasoning separately (OpenRouter, etc.)
 }

 type TextContentPart struct {
--- a/models/openrouter.go
+++ b/models/openrouter.go
@@ -32,10 +32,16 @@ type OpenRouterChatReq struct {
 	MinP        float32          `json:"min_p"`
 	NPredict    int32            `json:"max_tokens"`
 	Tools       []Tool           `json:"tools"`
+	Reasoning   *ReasoningConfig `json:"reasoning,omitempty"`
 }

-func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatReq {
-	return OpenRouterChatReq{
+type ReasoningConfig struct {
+	Effort  string `json:"effort,omitempty"`  // xhigh, high, medium, low, minimal, none
+	Summary string `json:"summary,omitempty"` // auto, concise, detailed
+}
+
+func NewOpenRouterChatReq(cb ChatBody, props map[string]float32, reasoningEffort string) OpenRouterChatReq {
+	req := OpenRouterChatReq{
 		Messages:    cb.Messages,
 		Model:       cb.Model,
 		Stream:      cb.Stream,
@@ -43,6 +49,13 @@ func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatR
 		MinP:        props["min_p"],
 		NPredict:    int32(props["n_predict"]),
 	}
+	// Only include reasoning config if effort is specified and not "none"
+	if reasoningEffort != "" && reasoningEffort != "none" {
+		req.Reasoning = &ReasoningConfig{
+			Effort: reasoningEffort,
+		}
+	}
+	return req
 }

 type OpenRouterChatRespNonStream struct {
@@ -82,6 +95,7 @@ type OpenRouterChatResp struct {
 		Delta struct {
 			Role      string          `json:"role"`
 			Content   string          `json:"content"`
+			Reasoning string          `json:"reasoning"`
 			ToolCalls []ToolDeltaResp `json:"tool_calls"`
 		} `json:"delta"`
 		FinishReason       string `json:"finish_reason"`