diff --git a/agent/request.go b/agent/request.go
index 14009dd..c89127f 100644
--- a/agent/request.go
+++ b/agent/request.go
@@ -15,10 +15,10 @@ import (
var httpClient = &http.Client{}
var defaultProps = map[string]float32{
- "temperature": 0.8,
- "dry_multiplier": 0.0,
- "min_p": 0.05,
- "n_predict": -1.0,
+ "temperature": 0.8,
+ "dry_multiplier": 0.0,
+ "min_p": 0.05,
+ "n_predict": -1.0,
}
func detectAPI(api string) (isCompletion, isChat, isDeepSeek, isOpenRouter bool) {
@@ -110,8 +110,8 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
req := models.NewDSChatReq(*chatBody)
return json.Marshal(req)
case isOpenRouter:
- // OpenRouter chat
- req := models.NewOpenRouterChatReq(*chatBody, defaultProps)
+ // OpenRouter chat - agents don't use reasoning by default
+ req := models.NewOpenRouterChatReq(*chatBody, defaultProps, "")
return json.Marshal(req)
default:
// Assume llama.cpp chat (OpenAI format)
diff --git a/bot.go b/bot.go
index a2a0d69..f7ba981 100644
--- a/bot.go
+++ b/bot.go
@@ -573,6 +573,9 @@ func sendMsgToLLM(body io.Reader) {
defer resp.Body.Close()
reader := bufio.NewReader(resp.Body)
counter := uint32(0)
+ reasoningBuffer := strings.Builder{}
+ hasReasoning := false
+ reasoningSent := false
for {
var (
answerText string
@@ -645,6 +648,13 @@ func sendMsgToLLM(body io.Reader) {
// break
// }
if chunk.Finished {
+ // Send any remaining reasoning if not already sent
+ if hasReasoning && !reasoningSent {
+ reasoningText := "" + reasoningBuffer.String() + ""
+ answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
+ chunkChan <- answerText
+ reasoningSent = true
+ }
if chunk.Chunk != "" {
logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
@@ -656,6 +666,20 @@ func sendMsgToLLM(body io.Reader) {
if counter == 0 {
chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ")
}
+ // Handle reasoning chunks - buffer them and prepend when content starts
+ if chunk.Reasoning != "" && !reasoningSent {
+ reasoningBuffer.WriteString(chunk.Reasoning)
+ hasReasoning = true
+ }
+
+ // When we get content and have buffered reasoning, send reasoning first
+ if chunk.Chunk != "" && hasReasoning && !reasoningSent {
+ reasoningText := "" + reasoningBuffer.String() + ""
+ answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
+ chunkChan <- answerText
+ reasoningSent = true
+ }
+
// bot sends way too many \n
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
// Accumulate text to check for stop strings that might span across chunks
@@ -666,7 +690,9 @@ func sendMsgToLLM(body io.Reader) {
logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
streamDone <- true
}
- chunkChan <- answerText
+ if answerText != "" {
+ chunkChan <- answerText
+ }
openAIToolChan <- chunk.ToolChunk
if chunk.FuncName != "" {
lastToolCall.Name = chunk.FuncName
diff --git a/config.example.toml b/config.example.toml
index 3e2ec77..60906da 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -50,3 +50,7 @@ CharSpecificContextEnabled = true
CharSpecificContextTag = "@"
AutoTurn = true
StripThinkingFromAPI = true # Strip blocks from messages before sending to LLM (keeps them in chat history)
+# OpenRouter reasoning configuration (only applies to OpenRouter chat API)
+# Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled)
+# Models that support reasoning will include thinking content wrapped in tags
+ReasoningEffort = "medium"
diff --git a/config/config.go b/config/config.go
index 8f1925c..10e43da 100644
--- a/config/config.go
+++ b/config/config.go
@@ -20,6 +20,7 @@ type Config struct {
ToolUse bool `toml:"ToolUse"`
ThinkUse bool `toml:"ThinkUse"`
StripThinkingFromAPI bool `toml:"StripThinkingFromAPI"`
+ ReasoningEffort string `toml:"ReasoningEffort"`
AssistantRole string `toml:"AssistantRole"`
SysDir string `toml:"SysDir"`
ChunkLimit uint32 `toml:"ChunkLimit"`
diff --git a/llm.go b/llm.go
index a648364..bca9655 100644
--- a/llm.go
+++ b/llm.go
@@ -614,12 +614,14 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
logger.Error("failed to decode", "error", err, "line", string(data))
return nil, err
}
+ lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1]
resp := &models.TextChunk{
- Chunk: llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content,
+ Chunk: lastChoice.Delta.Content,
+ Reasoning: lastChoice.Delta.Reasoning,
}
// Handle tool calls similar to LCPChat
- if len(llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls) > 0 {
- toolCall := llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls[0]
+ if len(lastChoice.Delta.ToolCalls) > 0 {
+ toolCall := lastChoice.Delta.ToolCalls[0]
resp.ToolChunk = toolCall.Function.Arguments
fname := toolCall.Function.Name
if fname != "" {
@@ -631,7 +633,7 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
if resp.ToolChunk != "" {
resp.ToolResp = true
}
- if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" {
+ if lastChoice.FinishReason == "stop" {
if resp.Chunk != "" {
logger.Error("text inside of finish llmchunk", "chunk", llmchunk)
}
@@ -710,7 +712,7 @@ func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, erro
}
// Clean null/empty messages to prevent API issues
bodyCopy.Messages = consolidateAssistantMessages(bodyCopy.Messages)
- orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps)
+ orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps, cfg.ReasoningEffort)
if cfg.ToolUse && !resume && role != cfg.ToolRole {
orBody.Tools = baseTools // set tools to use
}
diff --git a/models/models.go b/models/models.go
index 8f42795..b089ecd 100644
--- a/models/models.go
+++ b/models/models.go
@@ -86,6 +86,7 @@ type TextChunk struct {
ToolResp bool
FuncName string
ToolID string
+ Reasoning string // For models that send reasoning separately (OpenRouter, etc.)
}
type TextContentPart struct {
diff --git a/models/openrouter.go b/models/openrouter.go
index 6196498..62709a1 100644
--- a/models/openrouter.go
+++ b/models/openrouter.go
@@ -25,17 +25,23 @@ func NewOpenRouterCompletionReq(model, prompt string, props map[string]float32,
}
type OpenRouterChatReq struct {
- Messages []RoleMsg `json:"messages"`
- Model string `json:"model"`
- Stream bool `json:"stream"`
- Temperature float32 `json:"temperature"`
- MinP float32 `json:"min_p"`
- NPredict int32 `json:"max_tokens"`
- Tools []Tool `json:"tools"`
+ Messages []RoleMsg `json:"messages"`
+ Model string `json:"model"`
+ Stream bool `json:"stream"`
+ Temperature float32 `json:"temperature"`
+ MinP float32 `json:"min_p"`
+ NPredict int32 `json:"max_tokens"`
+ Tools []Tool `json:"tools"`
+ Reasoning *ReasoningConfig `json:"reasoning,omitempty"`
}
-func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatReq {
- return OpenRouterChatReq{
+type ReasoningConfig struct {
+ Effort string `json:"effort,omitempty"` // xhigh, high, medium, low, minimal, none
+ Summary string `json:"summary,omitempty"` // auto, concise, detailed
+}
+
+func NewOpenRouterChatReq(cb ChatBody, props map[string]float32, reasoningEffort string) OpenRouterChatReq {
+ req := OpenRouterChatReq{
Messages: cb.Messages,
Model: cb.Model,
Stream: cb.Stream,
@@ -43,6 +49,13 @@ func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatR
MinP: props["min_p"],
NPredict: int32(props["n_predict"]),
}
+ // Only include reasoning config if effort is specified and not "none"
+ if reasoningEffort != "" && reasoningEffort != "none" {
+ req.Reasoning = &ReasoningConfig{
+ Effort: reasoningEffort,
+ }
+ }
+ return req
}
type OpenRouterChatRespNonStream struct {
@@ -82,6 +95,7 @@ type OpenRouterChatResp struct {
Delta struct {
Role string `json:"role"`
Content string `json:"content"`
+ Reasoning string `json:"reasoning"`
ToolCalls []ToolDeltaResp `json:"tool_calls"`
} `json:"delta"`
FinishReason string `json:"finish_reason"`