Feat: openrouter reasoning

This commit is contained in:
Grail Finder
2026-02-21 16:26:13 +03:00
parent 85b11fa9ff
commit 96ffbd5cf5
7 changed files with 69 additions and 21 deletions

View File

@@ -15,10 +15,10 @@ import (
var httpClient = &http.Client{} var httpClient = &http.Client{}
var defaultProps = map[string]float32{ var defaultProps = map[string]float32{
"temperature": 0.8, "temperature": 0.8,
"dry_multiplier": 0.0, "dry_multiplier": 0.0,
"min_p": 0.05, "min_p": 0.05,
"n_predict": -1.0, "n_predict": -1.0,
} }
func detectAPI(api string) (isCompletion, isChat, isDeepSeek, isOpenRouter bool) { func detectAPI(api string) (isCompletion, isChat, isDeepSeek, isOpenRouter bool) {
@@ -110,8 +110,8 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
req := models.NewDSChatReq(*chatBody) req := models.NewDSChatReq(*chatBody)
return json.Marshal(req) return json.Marshal(req)
case isOpenRouter: case isOpenRouter:
// OpenRouter chat // OpenRouter chat - agents don't use reasoning by default
req := models.NewOpenRouterChatReq(*chatBody, defaultProps) req := models.NewOpenRouterChatReq(*chatBody, defaultProps, "")
return json.Marshal(req) return json.Marshal(req)
default: default:
// Assume llama.cpp chat (OpenAI format) // Assume llama.cpp chat (OpenAI format)

28
bot.go
View File

@@ -573,6 +573,9 @@ func sendMsgToLLM(body io.Reader) {
defer resp.Body.Close() defer resp.Body.Close()
reader := bufio.NewReader(resp.Body) reader := bufio.NewReader(resp.Body)
counter := uint32(0) counter := uint32(0)
reasoningBuffer := strings.Builder{}
hasReasoning := false
reasoningSent := false
for { for {
var ( var (
answerText string answerText string
@@ -645,6 +648,13 @@ func sendMsgToLLM(body io.Reader) {
// break // break
// } // }
if chunk.Finished { if chunk.Finished {
// Send any remaining reasoning if not already sent
if hasReasoning && !reasoningSent {
reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
chunkChan <- answerText
reasoningSent = true
}
if chunk.Chunk != "" { if chunk.Chunk != "" {
logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter) logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n") answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
@@ -656,6 +666,20 @@ func sendMsgToLLM(body io.Reader) {
if counter == 0 { if counter == 0 {
chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ") chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ")
} }
// Handle reasoning chunks - buffer them and prepend when content starts
if chunk.Reasoning != "" && !reasoningSent {
reasoningBuffer.WriteString(chunk.Reasoning)
hasReasoning = true
}
// When we get content and have buffered reasoning, send reasoning first
if chunk.Chunk != "" && hasReasoning && !reasoningSent {
reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
chunkChan <- answerText
reasoningSent = true
}
// bot sends way too many \n // bot sends way too many \n
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n") answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
// Accumulate text to check for stop strings that might span across chunks // Accumulate text to check for stop strings that might span across chunks
@@ -666,7 +690,9 @@ func sendMsgToLLM(body io.Reader) {
logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText) logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
streamDone <- true streamDone <- true
} }
chunkChan <- answerText if answerText != "" {
chunkChan <- answerText
}
openAIToolChan <- chunk.ToolChunk openAIToolChan <- chunk.ToolChunk
if chunk.FuncName != "" { if chunk.FuncName != "" {
lastToolCall.Name = chunk.FuncName lastToolCall.Name = chunk.FuncName

View File

@@ -50,3 +50,7 @@ CharSpecificContextEnabled = true
CharSpecificContextTag = "@" CharSpecificContextTag = "@"
AutoTurn = true AutoTurn = true
StripThinkingFromAPI = true # Strip <think> blocks from messages before sending to LLM (keeps them in chat history) StripThinkingFromAPI = true # Strip <think> blocks from messages before sending to LLM (keeps them in chat history)
# OpenRouter reasoning configuration (only applies to OpenRouter chat API)
# Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled)
# Models that support reasoning will include thinking content wrapped in <think> tags
ReasoningEffort = "medium"

View File

@@ -20,6 +20,7 @@ type Config struct {
ToolUse bool `toml:"ToolUse"` ToolUse bool `toml:"ToolUse"`
ThinkUse bool `toml:"ThinkUse"` ThinkUse bool `toml:"ThinkUse"`
StripThinkingFromAPI bool `toml:"StripThinkingFromAPI"` StripThinkingFromAPI bool `toml:"StripThinkingFromAPI"`
ReasoningEffort string `toml:"ReasoningEffort"`
AssistantRole string `toml:"AssistantRole"` AssistantRole string `toml:"AssistantRole"`
SysDir string `toml:"SysDir"` SysDir string `toml:"SysDir"`
ChunkLimit uint32 `toml:"ChunkLimit"` ChunkLimit uint32 `toml:"ChunkLimit"`

12
llm.go
View File

@@ -614,12 +614,14 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
logger.Error("failed to decode", "error", err, "line", string(data)) logger.Error("failed to decode", "error", err, "line", string(data))
return nil, err return nil, err
} }
lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1]
resp := &models.TextChunk{ resp := &models.TextChunk{
Chunk: llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content, Chunk: lastChoice.Delta.Content,
Reasoning: lastChoice.Delta.Reasoning,
} }
// Handle tool calls similar to LCPChat // Handle tool calls similar to LCPChat
if len(llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls) > 0 { if len(lastChoice.Delta.ToolCalls) > 0 {
toolCall := llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls[0] toolCall := lastChoice.Delta.ToolCalls[0]
resp.ToolChunk = toolCall.Function.Arguments resp.ToolChunk = toolCall.Function.Arguments
fname := toolCall.Function.Name fname := toolCall.Function.Name
if fname != "" { if fname != "" {
@@ -631,7 +633,7 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
if resp.ToolChunk != "" { if resp.ToolChunk != "" {
resp.ToolResp = true resp.ToolResp = true
} }
if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" { if lastChoice.FinishReason == "stop" {
if resp.Chunk != "" { if resp.Chunk != "" {
logger.Error("text inside of finish llmchunk", "chunk", llmchunk) logger.Error("text inside of finish llmchunk", "chunk", llmchunk)
} }
@@ -710,7 +712,7 @@ func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, erro
} }
// Clean null/empty messages to prevent API issues // Clean null/empty messages to prevent API issues
bodyCopy.Messages = consolidateAssistantMessages(bodyCopy.Messages) bodyCopy.Messages = consolidateAssistantMessages(bodyCopy.Messages)
orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps) orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps, cfg.ReasoningEffort)
if cfg.ToolUse && !resume && role != cfg.ToolRole { if cfg.ToolUse && !resume && role != cfg.ToolRole {
orBody.Tools = baseTools // set tools to use orBody.Tools = baseTools // set tools to use
} }

View File

@@ -86,6 +86,7 @@ type TextChunk struct {
ToolResp bool ToolResp bool
FuncName string FuncName string
ToolID string ToolID string
Reasoning string // For models that send reasoning separately (OpenRouter, etc.)
} }
type TextContentPart struct { type TextContentPart struct {

View File

@@ -25,17 +25,23 @@ func NewOpenRouterCompletionReq(model, prompt string, props map[string]float32,
} }
type OpenRouterChatReq struct { type OpenRouterChatReq struct {
Messages []RoleMsg `json:"messages"` Messages []RoleMsg `json:"messages"`
Model string `json:"model"` Model string `json:"model"`
Stream bool `json:"stream"` Stream bool `json:"stream"`
Temperature float32 `json:"temperature"` Temperature float32 `json:"temperature"`
MinP float32 `json:"min_p"` MinP float32 `json:"min_p"`
NPredict int32 `json:"max_tokens"` NPredict int32 `json:"max_tokens"`
Tools []Tool `json:"tools"` Tools []Tool `json:"tools"`
Reasoning *ReasoningConfig `json:"reasoning,omitempty"`
} }
func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatReq { type ReasoningConfig struct {
return OpenRouterChatReq{ Effort string `json:"effort,omitempty"` // xhigh, high, medium, low, minimal, none
Summary string `json:"summary,omitempty"` // auto, concise, detailed
}
func NewOpenRouterChatReq(cb ChatBody, props map[string]float32, reasoningEffort string) OpenRouterChatReq {
req := OpenRouterChatReq{
Messages: cb.Messages, Messages: cb.Messages,
Model: cb.Model, Model: cb.Model,
Stream: cb.Stream, Stream: cb.Stream,
@@ -43,6 +49,13 @@ func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatR
MinP: props["min_p"], MinP: props["min_p"],
NPredict: int32(props["n_predict"]), NPredict: int32(props["n_predict"]),
} }
// Only include reasoning config if effort is specified and not "none"
if reasoningEffort != "" && reasoningEffort != "none" {
req.Reasoning = &ReasoningConfig{
Effort: reasoningEffort,
}
}
return req
} }
type OpenRouterChatRespNonStream struct { type OpenRouterChatRespNonStream struct {
@@ -82,6 +95,7 @@ type OpenRouterChatResp struct {
Delta struct { Delta struct {
Role string `json:"role"` Role string `json:"role"`
Content string `json:"content"` Content string `json:"content"`
Reasoning string `json:"reasoning"`
ToolCalls []ToolDeltaResp `json:"tool_calls"` ToolCalls []ToolDeltaResp `json:"tool_calls"`
} `json:"delta"` } `json:"delta"`
FinishReason string `json:"finish_reason"` FinishReason string `json:"finish_reason"`