Feat: openrouter reasoning
This commit is contained in:
@@ -15,10 +15,10 @@ import (
|
|||||||
var httpClient = &http.Client{}
|
var httpClient = &http.Client{}
|
||||||
|
|
||||||
var defaultProps = map[string]float32{
|
var defaultProps = map[string]float32{
|
||||||
"temperature": 0.8,
|
"temperature": 0.8,
|
||||||
"dry_multiplier": 0.0,
|
"dry_multiplier": 0.0,
|
||||||
"min_p": 0.05,
|
"min_p": 0.05,
|
||||||
"n_predict": -1.0,
|
"n_predict": -1.0,
|
||||||
}
|
}
|
||||||
|
|
||||||
func detectAPI(api string) (isCompletion, isChat, isDeepSeek, isOpenRouter bool) {
|
func detectAPI(api string) (isCompletion, isChat, isDeepSeek, isOpenRouter bool) {
|
||||||
@@ -110,8 +110,8 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
|
|||||||
req := models.NewDSChatReq(*chatBody)
|
req := models.NewDSChatReq(*chatBody)
|
||||||
return json.Marshal(req)
|
return json.Marshal(req)
|
||||||
case isOpenRouter:
|
case isOpenRouter:
|
||||||
// OpenRouter chat
|
// OpenRouter chat - agents don't use reasoning by default
|
||||||
req := models.NewOpenRouterChatReq(*chatBody, defaultProps)
|
req := models.NewOpenRouterChatReq(*chatBody, defaultProps, "")
|
||||||
return json.Marshal(req)
|
return json.Marshal(req)
|
||||||
default:
|
default:
|
||||||
// Assume llama.cpp chat (OpenAI format)
|
// Assume llama.cpp chat (OpenAI format)
|
||||||
|
|||||||
28
bot.go
28
bot.go
@@ -573,6 +573,9 @@ func sendMsgToLLM(body io.Reader) {
|
|||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
reader := bufio.NewReader(resp.Body)
|
reader := bufio.NewReader(resp.Body)
|
||||||
counter := uint32(0)
|
counter := uint32(0)
|
||||||
|
reasoningBuffer := strings.Builder{}
|
||||||
|
hasReasoning := false
|
||||||
|
reasoningSent := false
|
||||||
for {
|
for {
|
||||||
var (
|
var (
|
||||||
answerText string
|
answerText string
|
||||||
@@ -645,6 +648,13 @@ func sendMsgToLLM(body io.Reader) {
|
|||||||
// break
|
// break
|
||||||
// }
|
// }
|
||||||
if chunk.Finished {
|
if chunk.Finished {
|
||||||
|
// Send any remaining reasoning if not already sent
|
||||||
|
if hasReasoning && !reasoningSent {
|
||||||
|
reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
|
||||||
|
answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
|
||||||
|
chunkChan <- answerText
|
||||||
|
reasoningSent = true
|
||||||
|
}
|
||||||
if chunk.Chunk != "" {
|
if chunk.Chunk != "" {
|
||||||
logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
|
logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
|
||||||
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
|
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
|
||||||
@@ -656,6 +666,20 @@ func sendMsgToLLM(body io.Reader) {
|
|||||||
if counter == 0 {
|
if counter == 0 {
|
||||||
chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ")
|
chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ")
|
||||||
}
|
}
|
||||||
|
// Handle reasoning chunks - buffer them and prepend when content starts
|
||||||
|
if chunk.Reasoning != "" && !reasoningSent {
|
||||||
|
reasoningBuffer.WriteString(chunk.Reasoning)
|
||||||
|
hasReasoning = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// When we get content and have buffered reasoning, send reasoning first
|
||||||
|
if chunk.Chunk != "" && hasReasoning && !reasoningSent {
|
||||||
|
reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
|
||||||
|
answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
|
||||||
|
chunkChan <- answerText
|
||||||
|
reasoningSent = true
|
||||||
|
}
|
||||||
|
|
||||||
// bot sends way too many \n
|
// bot sends way too many \n
|
||||||
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
|
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
|
||||||
// Accumulate text to check for stop strings that might span across chunks
|
// Accumulate text to check for stop strings that might span across chunks
|
||||||
@@ -666,7 +690,9 @@ func sendMsgToLLM(body io.Reader) {
|
|||||||
logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
|
logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
|
||||||
streamDone <- true
|
streamDone <- true
|
||||||
}
|
}
|
||||||
chunkChan <- answerText
|
if answerText != "" {
|
||||||
|
chunkChan <- answerText
|
||||||
|
}
|
||||||
openAIToolChan <- chunk.ToolChunk
|
openAIToolChan <- chunk.ToolChunk
|
||||||
if chunk.FuncName != "" {
|
if chunk.FuncName != "" {
|
||||||
lastToolCall.Name = chunk.FuncName
|
lastToolCall.Name = chunk.FuncName
|
||||||
|
|||||||
@@ -50,3 +50,7 @@ CharSpecificContextEnabled = true
|
|||||||
CharSpecificContextTag = "@"
|
CharSpecificContextTag = "@"
|
||||||
AutoTurn = true
|
AutoTurn = true
|
||||||
StripThinkingFromAPI = true # Strip <think> blocks from messages before sending to LLM (keeps them in chat history)
|
StripThinkingFromAPI = true # Strip <think> blocks from messages before sending to LLM (keeps them in chat history)
|
||||||
|
# OpenRouter reasoning configuration (only applies to OpenRouter chat API)
|
||||||
|
# Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled)
|
||||||
|
# Models that support reasoning will include thinking content wrapped in <think> tags
|
||||||
|
ReasoningEffort = "medium"
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ type Config struct {
|
|||||||
ToolUse bool `toml:"ToolUse"`
|
ToolUse bool `toml:"ToolUse"`
|
||||||
ThinkUse bool `toml:"ThinkUse"`
|
ThinkUse bool `toml:"ThinkUse"`
|
||||||
StripThinkingFromAPI bool `toml:"StripThinkingFromAPI"`
|
StripThinkingFromAPI bool `toml:"StripThinkingFromAPI"`
|
||||||
|
ReasoningEffort string `toml:"ReasoningEffort"`
|
||||||
AssistantRole string `toml:"AssistantRole"`
|
AssistantRole string `toml:"AssistantRole"`
|
||||||
SysDir string `toml:"SysDir"`
|
SysDir string `toml:"SysDir"`
|
||||||
ChunkLimit uint32 `toml:"ChunkLimit"`
|
ChunkLimit uint32 `toml:"ChunkLimit"`
|
||||||
|
|||||||
12
llm.go
12
llm.go
@@ -614,12 +614,14 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
|
|||||||
logger.Error("failed to decode", "error", err, "line", string(data))
|
logger.Error("failed to decode", "error", err, "line", string(data))
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1]
|
||||||
resp := &models.TextChunk{
|
resp := &models.TextChunk{
|
||||||
Chunk: llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content,
|
Chunk: lastChoice.Delta.Content,
|
||||||
|
Reasoning: lastChoice.Delta.Reasoning,
|
||||||
}
|
}
|
||||||
// Handle tool calls similar to LCPChat
|
// Handle tool calls similar to LCPChat
|
||||||
if len(llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls) > 0 {
|
if len(lastChoice.Delta.ToolCalls) > 0 {
|
||||||
toolCall := llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls[0]
|
toolCall := lastChoice.Delta.ToolCalls[0]
|
||||||
resp.ToolChunk = toolCall.Function.Arguments
|
resp.ToolChunk = toolCall.Function.Arguments
|
||||||
fname := toolCall.Function.Name
|
fname := toolCall.Function.Name
|
||||||
if fname != "" {
|
if fname != "" {
|
||||||
@@ -631,7 +633,7 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
|
|||||||
if resp.ToolChunk != "" {
|
if resp.ToolChunk != "" {
|
||||||
resp.ToolResp = true
|
resp.ToolResp = true
|
||||||
}
|
}
|
||||||
if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" {
|
if lastChoice.FinishReason == "stop" {
|
||||||
if resp.Chunk != "" {
|
if resp.Chunk != "" {
|
||||||
logger.Error("text inside of finish llmchunk", "chunk", llmchunk)
|
logger.Error("text inside of finish llmchunk", "chunk", llmchunk)
|
||||||
}
|
}
|
||||||
@@ -710,7 +712,7 @@ func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, erro
|
|||||||
}
|
}
|
||||||
// Clean null/empty messages to prevent API issues
|
// Clean null/empty messages to prevent API issues
|
||||||
bodyCopy.Messages = consolidateAssistantMessages(bodyCopy.Messages)
|
bodyCopy.Messages = consolidateAssistantMessages(bodyCopy.Messages)
|
||||||
orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps)
|
orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps, cfg.ReasoningEffort)
|
||||||
if cfg.ToolUse && !resume && role != cfg.ToolRole {
|
if cfg.ToolUse && !resume && role != cfg.ToolRole {
|
||||||
orBody.Tools = baseTools // set tools to use
|
orBody.Tools = baseTools // set tools to use
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -86,6 +86,7 @@ type TextChunk struct {
|
|||||||
ToolResp bool
|
ToolResp bool
|
||||||
FuncName string
|
FuncName string
|
||||||
ToolID string
|
ToolID string
|
||||||
|
Reasoning string // For models that send reasoning separately (OpenRouter, etc.)
|
||||||
}
|
}
|
||||||
|
|
||||||
type TextContentPart struct {
|
type TextContentPart struct {
|
||||||
|
|||||||
@@ -25,17 +25,23 @@ func NewOpenRouterCompletionReq(model, prompt string, props map[string]float32,
|
|||||||
}
|
}
|
||||||
|
|
||||||
type OpenRouterChatReq struct {
|
type OpenRouterChatReq struct {
|
||||||
Messages []RoleMsg `json:"messages"`
|
Messages []RoleMsg `json:"messages"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
Stream bool `json:"stream"`
|
Stream bool `json:"stream"`
|
||||||
Temperature float32 `json:"temperature"`
|
Temperature float32 `json:"temperature"`
|
||||||
MinP float32 `json:"min_p"`
|
MinP float32 `json:"min_p"`
|
||||||
NPredict int32 `json:"max_tokens"`
|
NPredict int32 `json:"max_tokens"`
|
||||||
Tools []Tool `json:"tools"`
|
Tools []Tool `json:"tools"`
|
||||||
|
Reasoning *ReasoningConfig `json:"reasoning,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatReq {
|
type ReasoningConfig struct {
|
||||||
return OpenRouterChatReq{
|
Effort string `json:"effort,omitempty"` // xhigh, high, medium, low, minimal, none
|
||||||
|
Summary string `json:"summary,omitempty"` // auto, concise, detailed
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewOpenRouterChatReq(cb ChatBody, props map[string]float32, reasoningEffort string) OpenRouterChatReq {
|
||||||
|
req := OpenRouterChatReq{
|
||||||
Messages: cb.Messages,
|
Messages: cb.Messages,
|
||||||
Model: cb.Model,
|
Model: cb.Model,
|
||||||
Stream: cb.Stream,
|
Stream: cb.Stream,
|
||||||
@@ -43,6 +49,13 @@ func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatR
|
|||||||
MinP: props["min_p"],
|
MinP: props["min_p"],
|
||||||
NPredict: int32(props["n_predict"]),
|
NPredict: int32(props["n_predict"]),
|
||||||
}
|
}
|
||||||
|
// Only include reasoning config if effort is specified and not "none"
|
||||||
|
if reasoningEffort != "" && reasoningEffort != "none" {
|
||||||
|
req.Reasoning = &ReasoningConfig{
|
||||||
|
Effort: reasoningEffort,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return req
|
||||||
}
|
}
|
||||||
|
|
||||||
type OpenRouterChatRespNonStream struct {
|
type OpenRouterChatRespNonStream struct {
|
||||||
@@ -82,6 +95,7 @@ type OpenRouterChatResp struct {
|
|||||||
Delta struct {
|
Delta struct {
|
||||||
Role string `json:"role"`
|
Role string `json:"role"`
|
||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
|
Reasoning string `json:"reasoning"`
|
||||||
ToolCalls []ToolDeltaResp `json:"tool_calls"`
|
ToolCalls []ToolDeltaResp `json:"tool_calls"`
|
||||||
} `json:"delta"`
|
} `json:"delta"`
|
||||||
FinishReason string `json:"finish_reason"`
|
FinishReason string `json:"finish_reason"`
|
||||||
|
|||||||
Reference in New Issue
Block a user