diff --git a/agent/request.go b/agent/request.go index 14009dd..c89127f 100644 --- a/agent/request.go +++ b/agent/request.go @@ -15,10 +15,10 @@ import ( var httpClient = &http.Client{} var defaultProps = map[string]float32{ - "temperature": 0.8, - "dry_multiplier": 0.0, - "min_p": 0.05, - "n_predict": -1.0, + "temperature": 0.8, + "dry_multiplier": 0.0, + "min_p": 0.05, + "n_predict": -1.0, } func detectAPI(api string) (isCompletion, isChat, isDeepSeek, isOpenRouter bool) { @@ -110,8 +110,8 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) { req := models.NewDSChatReq(*chatBody) return json.Marshal(req) case isOpenRouter: - // OpenRouter chat - req := models.NewOpenRouterChatReq(*chatBody, defaultProps) + // OpenRouter chat - agents don't use reasoning by default + req := models.NewOpenRouterChatReq(*chatBody, defaultProps, "") return json.Marshal(req) default: // Assume llama.cpp chat (OpenAI format) diff --git a/bot.go b/bot.go index a2a0d69..f7ba981 100644 --- a/bot.go +++ b/bot.go @@ -573,6 +573,9 @@ func sendMsgToLLM(body io.Reader) { defer resp.Body.Close() reader := bufio.NewReader(resp.Body) counter := uint32(0) + reasoningBuffer := strings.Builder{} + hasReasoning := false + reasoningSent := false for { var ( answerText string @@ -645,6 +648,13 @@ func sendMsgToLLM(body io.Reader) { // break // } if chunk.Finished { + // Send any remaining reasoning if not already sent + if hasReasoning && !reasoningSent { + reasoningText := "" + reasoningBuffer.String() + "" + answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n") + chunkChan <- answerText + reasoningSent = true + } if chunk.Chunk != "" { logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter) answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n") @@ -656,6 +666,20 @@ func sendMsgToLLM(body io.Reader) { if counter == 0 { chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ") } + // Handle reasoning chunks - buffer them and prepend when content starts + if chunk.Reasoning != "" && !reasoningSent { + reasoningBuffer.WriteString(chunk.Reasoning) + hasReasoning = true + } + + // When we get content and have buffered reasoning, send reasoning first + if chunk.Chunk != "" && hasReasoning && !reasoningSent { + reasoningText := "" + reasoningBuffer.String() + "" + answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n") + chunkChan <- answerText + reasoningSent = true + } + // bot sends way too many \n answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n") // Accumulate text to check for stop strings that might span across chunks @@ -666,7 +690,9 @@ func sendMsgToLLM(body io.Reader) { logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText) streamDone <- true } - chunkChan <- answerText + if answerText != "" { + chunkChan <- answerText + } openAIToolChan <- chunk.ToolChunk if chunk.FuncName != "" { lastToolCall.Name = chunk.FuncName diff --git a/config.example.toml b/config.example.toml index 3e2ec77..60906da 100644 --- a/config.example.toml +++ b/config.example.toml @@ -50,3 +50,7 @@ CharSpecificContextEnabled = true CharSpecificContextTag = "@" AutoTurn = true StripThinkingFromAPI = true # Strip blocks from messages before sending to LLM (keeps them in chat history) +# OpenRouter reasoning configuration (only applies to OpenRouter chat API) +# Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled) +# Models that support reasoning will include thinking content wrapped in tags +ReasoningEffort = "medium" diff --git a/config/config.go b/config/config.go index 8f1925c..10e43da 100644 --- a/config/config.go +++ b/config/config.go @@ -20,6 +20,7 @@ type Config struct { ToolUse bool `toml:"ToolUse"` ThinkUse bool `toml:"ThinkUse"` StripThinkingFromAPI bool `toml:"StripThinkingFromAPI"` + ReasoningEffort string `toml:"ReasoningEffort"` AssistantRole string `toml:"AssistantRole"` SysDir string `toml:"SysDir"` ChunkLimit uint32 `toml:"ChunkLimit"` diff --git a/llm.go b/llm.go index a648364..bca9655 100644 --- a/llm.go +++ b/llm.go @@ -614,12 +614,14 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) { logger.Error("failed to decode", "error", err, "line", string(data)) return nil, err } + lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1] resp := &models.TextChunk{ - Chunk: llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content, + Chunk: lastChoice.Delta.Content, + Reasoning: lastChoice.Delta.Reasoning, } // Handle tool calls similar to LCPChat - if len(llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls) > 0 { - toolCall := llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls[0] + if len(lastChoice.Delta.ToolCalls) > 0 { + toolCall := lastChoice.Delta.ToolCalls[0] resp.ToolChunk = toolCall.Function.Arguments fname := toolCall.Function.Name if fname != "" { @@ -631,7 +633,7 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) { if resp.ToolChunk != "" { resp.ToolResp = true } - if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" { + if lastChoice.FinishReason == "stop" { if resp.Chunk != "" { logger.Error("text inside of finish llmchunk", "chunk", llmchunk) } @@ -710,7 +712,7 @@ func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, erro } // Clean null/empty messages to prevent API issues bodyCopy.Messages = consolidateAssistantMessages(bodyCopy.Messages) - orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps) + orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps, cfg.ReasoningEffort) if cfg.ToolUse && !resume && role != cfg.ToolRole { orBody.Tools = baseTools // set tools to use } diff --git a/models/models.go b/models/models.go index 8f42795..b089ecd 100644 --- a/models/models.go +++ b/models/models.go @@ -86,6 +86,7 @@ type TextChunk struct { ToolResp bool FuncName string ToolID string + Reasoning string // For models that send reasoning separately (OpenRouter, etc.) } type TextContentPart struct { diff --git a/models/openrouter.go b/models/openrouter.go index 6196498..62709a1 100644 --- a/models/openrouter.go +++ b/models/openrouter.go @@ -25,17 +25,23 @@ func NewOpenRouterCompletionReq(model, prompt string, props map[string]float32, } type OpenRouterChatReq struct { - Messages []RoleMsg `json:"messages"` - Model string `json:"model"` - Stream bool `json:"stream"` - Temperature float32 `json:"temperature"` - MinP float32 `json:"min_p"` - NPredict int32 `json:"max_tokens"` - Tools []Tool `json:"tools"` + Messages []RoleMsg `json:"messages"` + Model string `json:"model"` + Stream bool `json:"stream"` + Temperature float32 `json:"temperature"` + MinP float32 `json:"min_p"` + NPredict int32 `json:"max_tokens"` + Tools []Tool `json:"tools"` + Reasoning *ReasoningConfig `json:"reasoning,omitempty"` } -func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatReq { - return OpenRouterChatReq{ +type ReasoningConfig struct { + Effort string `json:"effort,omitempty"` // xhigh, high, medium, low, minimal, none + Summary string `json:"summary,omitempty"` // auto, concise, detailed +} + +func NewOpenRouterChatReq(cb ChatBody, props map[string]float32, reasoningEffort string) OpenRouterChatReq { + req := OpenRouterChatReq{ Messages: cb.Messages, Model: cb.Model, Stream: cb.Stream, @@ -43,6 +49,13 @@ func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatR MinP: props["min_p"], NPredict: int32(props["n_predict"]), } + // Only include reasoning config if effort is specified and not "none" + if reasoningEffort != "" && reasoningEffort != "none" { + req.Reasoning = &ReasoningConfig{ + Effort: reasoningEffort, + } + } + return req } type OpenRouterChatRespNonStream struct { @@ -82,6 +95,7 @@ type OpenRouterChatResp struct { Delta struct { Role string `json:"role"` Content string `json:"content"` + Reasoning string `json:"reasoning"` ToolCalls []ToolDeltaResp `json:"tool_calls"` } `json:"delta"` FinishReason string `json:"finish_reason"`