Feat: openrouter reasoning

This commit is contained in:
Grail Finder
2026-02-21 16:26:13 +03:00
parent 85b11fa9ff
commit 96ffbd5cf5
7 changed files with 69 additions and 21 deletions

View File

@@ -15,10 +15,10 @@ import (
var httpClient = &http.Client{}
var defaultProps = map[string]float32{
"temperature": 0.8,
"dry_multiplier": 0.0,
"min_p": 0.05,
"n_predict": -1.0,
"temperature": 0.8,
"dry_multiplier": 0.0,
"min_p": 0.05,
"n_predict": -1.0,
}
func detectAPI(api string) (isCompletion, isChat, isDeepSeek, isOpenRouter bool) {
@@ -110,8 +110,8 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
req := models.NewDSChatReq(*chatBody)
return json.Marshal(req)
case isOpenRouter:
// OpenRouter chat
req := models.NewOpenRouterChatReq(*chatBody, defaultProps)
// OpenRouter chat - agents don't use reasoning by default
req := models.NewOpenRouterChatReq(*chatBody, defaultProps, "")
return json.Marshal(req)
default:
// Assume llama.cpp chat (OpenAI format)

28
bot.go
View File

@@ -573,6 +573,9 @@ func sendMsgToLLM(body io.Reader) {
defer resp.Body.Close()
reader := bufio.NewReader(resp.Body)
counter := uint32(0)
reasoningBuffer := strings.Builder{}
hasReasoning := false
reasoningSent := false
for {
var (
answerText string
@@ -645,6 +648,13 @@ func sendMsgToLLM(body io.Reader) {
// break
// }
if chunk.Finished {
// Send any remaining reasoning if not already sent
if hasReasoning && !reasoningSent {
reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
chunkChan <- answerText
reasoningSent = true
}
if chunk.Chunk != "" {
logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
@@ -656,6 +666,20 @@ func sendMsgToLLM(body io.Reader) {
if counter == 0 {
chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ")
}
// Handle reasoning chunks - buffer them and prepend when content starts
if chunk.Reasoning != "" && !reasoningSent {
reasoningBuffer.WriteString(chunk.Reasoning)
hasReasoning = true
}
// When we get content and have buffered reasoning, send reasoning first
if chunk.Chunk != "" && hasReasoning && !reasoningSent {
reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
chunkChan <- answerText
reasoningSent = true
}
// bot sends way too many \n
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
// Accumulate text to check for stop strings that might span across chunks
@@ -666,7 +690,9 @@ func sendMsgToLLM(body io.Reader) {
logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
streamDone <- true
}
chunkChan <- answerText
if answerText != "" {
chunkChan <- answerText
}
openAIToolChan <- chunk.ToolChunk
if chunk.FuncName != "" {
lastToolCall.Name = chunk.FuncName

View File

@@ -50,3 +50,7 @@ CharSpecificContextEnabled = true
CharSpecificContextTag = "@"
AutoTurn = true
StripThinkingFromAPI = true # Strip <think> blocks from messages before sending to LLM (keeps them in chat history)
# OpenRouter reasoning configuration (only applies to OpenRouter chat API)
# Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled)
# Models that support reasoning will include thinking content wrapped in <think> tags
ReasoningEffort = "medium"

View File

@@ -20,6 +20,7 @@ type Config struct {
ToolUse bool `toml:"ToolUse"`
ThinkUse bool `toml:"ThinkUse"`
StripThinkingFromAPI bool `toml:"StripThinkingFromAPI"`
ReasoningEffort string `toml:"ReasoningEffort"`
AssistantRole string `toml:"AssistantRole"`
SysDir string `toml:"SysDir"`
ChunkLimit uint32 `toml:"ChunkLimit"`

12
llm.go
View File

@@ -614,12 +614,14 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
logger.Error("failed to decode", "error", err, "line", string(data))
return nil, err
}
lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1]
resp := &models.TextChunk{
Chunk: llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content,
Chunk: lastChoice.Delta.Content,
Reasoning: lastChoice.Delta.Reasoning,
}
// Handle tool calls similar to LCPChat
if len(llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls) > 0 {
toolCall := llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls[0]
if len(lastChoice.Delta.ToolCalls) > 0 {
toolCall := lastChoice.Delta.ToolCalls[0]
resp.ToolChunk = toolCall.Function.Arguments
fname := toolCall.Function.Name
if fname != "" {
@@ -631,7 +633,7 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
if resp.ToolChunk != "" {
resp.ToolResp = true
}
if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" {
if lastChoice.FinishReason == "stop" {
if resp.Chunk != "" {
logger.Error("text inside of finish llmchunk", "chunk", llmchunk)
}
@@ -710,7 +712,7 @@ func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, erro
}
// Clean null/empty messages to prevent API issues
bodyCopy.Messages = consolidateAssistantMessages(bodyCopy.Messages)
orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps)
orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps, cfg.ReasoningEffort)
if cfg.ToolUse && !resume && role != cfg.ToolRole {
orBody.Tools = baseTools // set tools to use
}

View File

@@ -86,6 +86,7 @@ type TextChunk struct {
ToolResp bool
FuncName string
ToolID string
Reasoning string // For models that send reasoning separately (OpenRouter, etc.)
}
type TextContentPart struct {

View File

@@ -25,17 +25,23 @@ func NewOpenRouterCompletionReq(model, prompt string, props map[string]float32,
}
type OpenRouterChatReq struct {
Messages []RoleMsg `json:"messages"`
Model string `json:"model"`
Stream bool `json:"stream"`
Temperature float32 `json:"temperature"`
MinP float32 `json:"min_p"`
NPredict int32 `json:"max_tokens"`
Tools []Tool `json:"tools"`
Messages []RoleMsg `json:"messages"`
Model string `json:"model"`
Stream bool `json:"stream"`
Temperature float32 `json:"temperature"`
MinP float32 `json:"min_p"`
NPredict int32 `json:"max_tokens"`
Tools []Tool `json:"tools"`
Reasoning *ReasoningConfig `json:"reasoning,omitempty"`
}
func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatReq {
return OpenRouterChatReq{
type ReasoningConfig struct {
Effort string `json:"effort,omitempty"` // xhigh, high, medium, low, minimal, none
Summary string `json:"summary,omitempty"` // auto, concise, detailed
}
func NewOpenRouterChatReq(cb ChatBody, props map[string]float32, reasoningEffort string) OpenRouterChatReq {
req := OpenRouterChatReq{
Messages: cb.Messages,
Model: cb.Model,
Stream: cb.Stream,
@@ -43,6 +49,13 @@ func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatR
MinP: props["min_p"],
NPredict: int32(props["n_predict"]),
}
// Only include reasoning config if effort is specified and not "none"
if reasoningEffort != "" && reasoningEffort != "none" {
req.Reasoning = &ReasoningConfig{
Effort: reasoningEffort,
}
}
return req
}
type OpenRouterChatRespNonStream struct {
@@ -82,6 +95,7 @@ type OpenRouterChatResp struct {
Delta struct {
Role string `json:"role"`
Content string `json:"content"`
Reasoning string `json:"reasoning"`
ToolCalls []ToolDeltaResp `json:"tool_calls"`
} `json:"delta"`
FinishReason string `json:"finish_reason"`