Feat: openrouter reasoning
This commit is contained in:
@@ -110,8 +110,8 @@ func (ag *AgentClient) buildRequest(sysprompt, msg string) ([]byte, error) {
|
||||
req := models.NewDSChatReq(*chatBody)
|
||||
return json.Marshal(req)
|
||||
case isOpenRouter:
|
||||
// OpenRouter chat
|
||||
req := models.NewOpenRouterChatReq(*chatBody, defaultProps)
|
||||
// OpenRouter chat - agents don't use reasoning by default
|
||||
req := models.NewOpenRouterChatReq(*chatBody, defaultProps, "")
|
||||
return json.Marshal(req)
|
||||
default:
|
||||
// Assume llama.cpp chat (OpenAI format)
|
||||
|
||||
26
bot.go
26
bot.go
@@ -573,6 +573,9 @@ func sendMsgToLLM(body io.Reader) {
|
||||
defer resp.Body.Close()
|
||||
reader := bufio.NewReader(resp.Body)
|
||||
counter := uint32(0)
|
||||
reasoningBuffer := strings.Builder{}
|
||||
hasReasoning := false
|
||||
reasoningSent := false
|
||||
for {
|
||||
var (
|
||||
answerText string
|
||||
@@ -645,6 +648,13 @@ func sendMsgToLLM(body io.Reader) {
|
||||
// break
|
||||
// }
|
||||
if chunk.Finished {
|
||||
// Send any remaining reasoning if not already sent
|
||||
if hasReasoning && !reasoningSent {
|
||||
reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
|
||||
answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
|
||||
chunkChan <- answerText
|
||||
reasoningSent = true
|
||||
}
|
||||
if chunk.Chunk != "" {
|
||||
logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
|
||||
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
|
||||
@@ -656,6 +666,20 @@ func sendMsgToLLM(body io.Reader) {
|
||||
if counter == 0 {
|
||||
chunk.Chunk = strings.TrimPrefix(chunk.Chunk, " ")
|
||||
}
|
||||
// Handle reasoning chunks - buffer them and prepend when content starts
|
||||
if chunk.Reasoning != "" && !reasoningSent {
|
||||
reasoningBuffer.WriteString(chunk.Reasoning)
|
||||
hasReasoning = true
|
||||
}
|
||||
|
||||
// When we get content and have buffered reasoning, send reasoning first
|
||||
if chunk.Chunk != "" && hasReasoning && !reasoningSent {
|
||||
reasoningText := "<think>" + reasoningBuffer.String() + "</think>"
|
||||
answerText = strings.ReplaceAll(reasoningText, "\n\n", "\n")
|
||||
chunkChan <- answerText
|
||||
reasoningSent = true
|
||||
}
|
||||
|
||||
// bot sends way too many \n
|
||||
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
|
||||
// Accumulate text to check for stop strings that might span across chunks
|
||||
@@ -666,7 +690,9 @@ func sendMsgToLLM(body io.Reader) {
|
||||
logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
|
||||
streamDone <- true
|
||||
}
|
||||
if answerText != "" {
|
||||
chunkChan <- answerText
|
||||
}
|
||||
openAIToolChan <- chunk.ToolChunk
|
||||
if chunk.FuncName != "" {
|
||||
lastToolCall.Name = chunk.FuncName
|
||||
|
||||
@@ -50,3 +50,7 @@ CharSpecificContextEnabled = true
|
||||
CharSpecificContextTag = "@"
|
||||
AutoTurn = true
|
||||
StripThinkingFromAPI = true # Strip <think> blocks from messages before sending to LLM (keeps them in chat history)
|
||||
# OpenRouter reasoning configuration (only applies to OpenRouter chat API)
|
||||
# Valid values: xhigh, high, medium, low, minimal, none (empty or none = disabled)
|
||||
# Models that support reasoning will include thinking content wrapped in <think> tags
|
||||
ReasoningEffort = "medium"
|
||||
|
||||
@@ -20,6 +20,7 @@ type Config struct {
|
||||
ToolUse bool `toml:"ToolUse"`
|
||||
ThinkUse bool `toml:"ThinkUse"`
|
||||
StripThinkingFromAPI bool `toml:"StripThinkingFromAPI"`
|
||||
ReasoningEffort string `toml:"ReasoningEffort"`
|
||||
AssistantRole string `toml:"AssistantRole"`
|
||||
SysDir string `toml:"SysDir"`
|
||||
ChunkLimit uint32 `toml:"ChunkLimit"`
|
||||
|
||||
12
llm.go
12
llm.go
@@ -614,12 +614,14 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
|
||||
logger.Error("failed to decode", "error", err, "line", string(data))
|
||||
return nil, err
|
||||
}
|
||||
lastChoice := llmchunk.Choices[len(llmchunk.Choices)-1]
|
||||
resp := &models.TextChunk{
|
||||
Chunk: llmchunk.Choices[len(llmchunk.Choices)-1].Delta.Content,
|
||||
Chunk: lastChoice.Delta.Content,
|
||||
Reasoning: lastChoice.Delta.Reasoning,
|
||||
}
|
||||
// Handle tool calls similar to LCPChat
|
||||
if len(llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls) > 0 {
|
||||
toolCall := llmchunk.Choices[len(llmchunk.Choices)-1].Delta.ToolCalls[0]
|
||||
if len(lastChoice.Delta.ToolCalls) > 0 {
|
||||
toolCall := lastChoice.Delta.ToolCalls[0]
|
||||
resp.ToolChunk = toolCall.Function.Arguments
|
||||
fname := toolCall.Function.Name
|
||||
if fname != "" {
|
||||
@@ -631,7 +633,7 @@ func (or OpenRouterChat) ParseChunk(data []byte) (*models.TextChunk, error) {
|
||||
if resp.ToolChunk != "" {
|
||||
resp.ToolResp = true
|
||||
}
|
||||
if llmchunk.Choices[len(llmchunk.Choices)-1].FinishReason == "stop" {
|
||||
if lastChoice.FinishReason == "stop" {
|
||||
if resp.Chunk != "" {
|
||||
logger.Error("text inside of finish llmchunk", "chunk", llmchunk)
|
||||
}
|
||||
@@ -710,7 +712,7 @@ func (or OpenRouterChat) FormMsg(msg, role string, resume bool) (io.Reader, erro
|
||||
}
|
||||
// Clean null/empty messages to prevent API issues
|
||||
bodyCopy.Messages = consolidateAssistantMessages(bodyCopy.Messages)
|
||||
orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps)
|
||||
orBody := models.NewOpenRouterChatReq(*bodyCopy, defaultLCPProps, cfg.ReasoningEffort)
|
||||
if cfg.ToolUse && !resume && role != cfg.ToolRole {
|
||||
orBody.Tools = baseTools // set tools to use
|
||||
}
|
||||
|
||||
@@ -86,6 +86,7 @@ type TextChunk struct {
|
||||
ToolResp bool
|
||||
FuncName string
|
||||
ToolID string
|
||||
Reasoning string // For models that send reasoning separately (OpenRouter, etc.)
|
||||
}
|
||||
|
||||
type TextContentPart struct {
|
||||
|
||||
@@ -32,10 +32,16 @@ type OpenRouterChatReq struct {
|
||||
MinP float32 `json:"min_p"`
|
||||
NPredict int32 `json:"max_tokens"`
|
||||
Tools []Tool `json:"tools"`
|
||||
Reasoning *ReasoningConfig `json:"reasoning,omitempty"`
|
||||
}
|
||||
|
||||
func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatReq {
|
||||
return OpenRouterChatReq{
|
||||
type ReasoningConfig struct {
|
||||
Effort string `json:"effort,omitempty"` // xhigh, high, medium, low, minimal, none
|
||||
Summary string `json:"summary,omitempty"` // auto, concise, detailed
|
||||
}
|
||||
|
||||
func NewOpenRouterChatReq(cb ChatBody, props map[string]float32, reasoningEffort string) OpenRouterChatReq {
|
||||
req := OpenRouterChatReq{
|
||||
Messages: cb.Messages,
|
||||
Model: cb.Model,
|
||||
Stream: cb.Stream,
|
||||
@@ -43,6 +49,13 @@ func NewOpenRouterChatReq(cb ChatBody, props map[string]float32) OpenRouterChatR
|
||||
MinP: props["min_p"],
|
||||
NPredict: int32(props["n_predict"]),
|
||||
}
|
||||
// Only include reasoning config if effort is specified and not "none"
|
||||
if reasoningEffort != "" && reasoningEffort != "none" {
|
||||
req.Reasoning = &ReasoningConfig{
|
||||
Effort: reasoningEffort,
|
||||
}
|
||||
}
|
||||
return req
|
||||
}
|
||||
|
||||
type OpenRouterChatRespNonStream struct {
|
||||
@@ -82,6 +95,7 @@ type OpenRouterChatResp struct {
|
||||
Delta struct {
|
||||
Role string `json:"role"`
|
||||
Content string `json:"content"`
|
||||
Reasoning string `json:"reasoning"`
|
||||
ToolCalls []ToolDeltaResp `json:"tool_calls"`
|
||||
} `json:"delta"`
|
||||
FinishReason string `json:"finish_reason"`
|
||||
|
||||
Reference in New Issue
Block a user