Compare commits
3 Commits
850ca103e5
...
ef53e9bebe
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ef53e9bebe | ||
|
|
a546bfe596 | ||
|
|
23c21f87bb |
65
bot.go
65
bot.go
@@ -46,6 +46,7 @@ var (
|
|||||||
ragger *rag.RAG
|
ragger *rag.RAG
|
||||||
chunkParser ChunkParser
|
chunkParser ChunkParser
|
||||||
lastToolCall *models.FuncCall
|
lastToolCall *models.FuncCall
|
||||||
|
lastRespStats *models.ResponseStats
|
||||||
//nolint:unused // TTS_ENABLED conditionally uses this
|
//nolint:unused // TTS_ENABLED conditionally uses this
|
||||||
orator Orator
|
orator Orator
|
||||||
asr STT
|
asr STT
|
||||||
@@ -484,30 +485,28 @@ func monitorModelLoad(modelID string) {
|
|||||||
// extractDetailedErrorFromBytes extracts detailed error information from response body bytes
|
// extractDetailedErrorFromBytes extracts detailed error information from response body bytes
|
||||||
func extractDetailedErrorFromBytes(body []byte, statusCode int) string {
|
func extractDetailedErrorFromBytes(body []byte, statusCode int) string {
|
||||||
// Try to parse as JSON to extract detailed error information
|
// Try to parse as JSON to extract detailed error information
|
||||||
var errorResponse map[string]interface{}
|
var errorResponse map[string]any
|
||||||
if err := json.Unmarshal(body, &errorResponse); err == nil {
|
if err := json.Unmarshal(body, &errorResponse); err == nil {
|
||||||
// Check if it's an error response with detailed information
|
// Check if it's an error response with detailed information
|
||||||
if errorData, ok := errorResponse["error"]; ok {
|
if errorData, ok := errorResponse["error"]; ok {
|
||||||
if errorMap, ok := errorData.(map[string]interface{}); ok {
|
if errorMap, ok := errorData.(map[string]any); ok {
|
||||||
var errorMsg string
|
var errorMsg string
|
||||||
if msg, ok := errorMap["message"]; ok {
|
if msg, ok := errorMap["message"]; ok {
|
||||||
errorMsg = fmt.Sprintf("%v", msg)
|
errorMsg = fmt.Sprintf("%v", msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
var details []string
|
var details []string
|
||||||
if code, ok := errorMap["code"]; ok {
|
if code, ok := errorMap["code"]; ok {
|
||||||
details = append(details, fmt.Sprintf("Code: %v", code))
|
details = append(details, fmt.Sprintf("Code: %v", code))
|
||||||
}
|
}
|
||||||
|
|
||||||
if metadata, ok := errorMap["metadata"]; ok {
|
if metadata, ok := errorMap["metadata"]; ok {
|
||||||
// Handle metadata which might contain raw error details
|
// Handle metadata which might contain raw error details
|
||||||
if metadataMap, ok := metadata.(map[string]interface{}); ok {
|
if metadataMap, ok := metadata.(map[string]any); ok {
|
||||||
if raw, ok := metadataMap["raw"]; ok {
|
if raw, ok := metadataMap["raw"]; ok {
|
||||||
// Parse the raw error string if it's JSON
|
// Parse the raw error string if it's JSON
|
||||||
var rawError map[string]interface{}
|
var rawError map[string]any
|
||||||
if rawStr, ok := raw.(string); ok && json.Unmarshal([]byte(rawStr), &rawError) == nil {
|
if rawStr, ok := raw.(string); ok && json.Unmarshal([]byte(rawStr), &rawError) == nil {
|
||||||
if rawErrorData, ok := rawError["error"]; ok {
|
if rawErrorData, ok := rawError["error"]; ok {
|
||||||
if rawErrorMap, ok := rawErrorData.(map[string]interface{}); ok {
|
if rawErrorMap, ok := rawErrorData.(map[string]any); ok {
|
||||||
if rawMsg, ok := rawErrorMap["message"]; ok {
|
if rawMsg, ok := rawErrorMap["message"]; ok {
|
||||||
return fmt.Sprintf("API Error: %s", rawMsg)
|
return fmt.Sprintf("API Error: %s", rawMsg)
|
||||||
}
|
}
|
||||||
@@ -518,20 +517,30 @@ func extractDetailedErrorFromBytes(body []byte, statusCode int) string {
|
|||||||
}
|
}
|
||||||
details = append(details, fmt.Sprintf("Metadata: %v", metadata))
|
details = append(details, fmt.Sprintf("Metadata: %v", metadata))
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(details) > 0 {
|
if len(details) > 0 {
|
||||||
return fmt.Sprintf("API Error: %s (%s)", errorMsg, strings.Join(details, ", "))
|
return fmt.Sprintf("API Error: %s (%s)", errorMsg, strings.Join(details, ", "))
|
||||||
}
|
}
|
||||||
|
|
||||||
return "API Error: " + errorMsg
|
return "API Error: " + errorMsg
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If not a structured error response, return the raw body with status
|
// If not a structured error response, return the raw body with status
|
||||||
return fmt.Sprintf("HTTP Status: %d, Response Body: %s", statusCode, string(body))
|
return fmt.Sprintf("HTTP Status: %d, Response Body: %s", statusCode, string(body))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func finalizeRespStats(tokenCount int, startTime time.Time) {
|
||||||
|
duration := time.Since(startTime).Seconds()
|
||||||
|
var tps float64
|
||||||
|
if duration > 0 {
|
||||||
|
tps = float64(tokenCount) / duration
|
||||||
|
}
|
||||||
|
lastRespStats = &models.ResponseStats{
|
||||||
|
Tokens: tokenCount,
|
||||||
|
Duration: duration,
|
||||||
|
TokensPerSec: tps,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// sendMsgToLLM expects streaming resp
|
// sendMsgToLLM expects streaming resp
|
||||||
func sendMsgToLLM(body io.Reader) {
|
func sendMsgToLLM(body io.Reader) {
|
||||||
choseChunkParser()
|
choseChunkParser()
|
||||||
@@ -586,12 +595,17 @@ func sendMsgToLLM(body io.Reader) {
|
|||||||
streamDone <- true
|
streamDone <- true
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
//
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
reader := bufio.NewReader(resp.Body)
|
reader := bufio.NewReader(resp.Body)
|
||||||
counter := uint32(0)
|
counter := uint32(0)
|
||||||
|
tokenCount := 0
|
||||||
|
startTime := time.Now()
|
||||||
hasReasoning := false
|
hasReasoning := false
|
||||||
reasoningSent := false
|
reasoningSent := false
|
||||||
|
defer func() {
|
||||||
|
finalizeRespStats(tokenCount, startTime)
|
||||||
|
}()
|
||||||
for {
|
for {
|
||||||
var (
|
var (
|
||||||
answerText string
|
answerText string
|
||||||
@@ -667,11 +681,13 @@ func sendMsgToLLM(body io.Reader) {
|
|||||||
// Close the thinking block if we were streaming reasoning and haven't closed it yet
|
// Close the thinking block if we were streaming reasoning and haven't closed it yet
|
||||||
if hasReasoning && !reasoningSent {
|
if hasReasoning && !reasoningSent {
|
||||||
chunkChan <- "</think>"
|
chunkChan <- "</think>"
|
||||||
|
tokenCount++
|
||||||
}
|
}
|
||||||
if chunk.Chunk != "" {
|
if chunk.Chunk != "" {
|
||||||
logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
|
logger.Warn("text inside of finish llmchunk", "chunk", chunk, "counter", counter)
|
||||||
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
|
answerText = strings.ReplaceAll(chunk.Chunk, "\n\n", "\n")
|
||||||
chunkChan <- answerText
|
chunkChan <- answerText
|
||||||
|
tokenCount++
|
||||||
}
|
}
|
||||||
streamDone <- true
|
streamDone <- true
|
||||||
break
|
break
|
||||||
@@ -684,12 +700,14 @@ func sendMsgToLLM(body io.Reader) {
|
|||||||
if !hasReasoning {
|
if !hasReasoning {
|
||||||
// First reasoning chunk - send opening tag
|
// First reasoning chunk - send opening tag
|
||||||
chunkChan <- "<think>"
|
chunkChan <- "<think>"
|
||||||
|
tokenCount++
|
||||||
hasReasoning = true
|
hasReasoning = true
|
||||||
}
|
}
|
||||||
// Stream reasoning content immediately
|
// Stream reasoning content immediately
|
||||||
answerText = strings.ReplaceAll(chunk.Reasoning, "\n\n", "\n")
|
answerText = strings.ReplaceAll(chunk.Reasoning, "\n\n", "\n")
|
||||||
if answerText != "" {
|
if answerText != "" {
|
||||||
chunkChan <- answerText
|
chunkChan <- answerText
|
||||||
|
tokenCount++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -697,6 +715,7 @@ func sendMsgToLLM(body io.Reader) {
|
|||||||
if chunk.Chunk != "" && hasReasoning && !reasoningSent {
|
if chunk.Chunk != "" && hasReasoning && !reasoningSent {
|
||||||
// Close the thinking block before sending actual content
|
// Close the thinking block before sending actual content
|
||||||
chunkChan <- "</think>"
|
chunkChan <- "</think>"
|
||||||
|
tokenCount++
|
||||||
reasoningSent = true
|
reasoningSent = true
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -709,9 +728,11 @@ func sendMsgToLLM(body io.Reader) {
|
|||||||
slices.Contains(stopStrings, answerText) {
|
slices.Contains(stopStrings, answerText) {
|
||||||
logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
|
logger.Debug("stop string detected on client side for completion endpoint", "stop_string", answerText)
|
||||||
streamDone <- true
|
streamDone <- true
|
||||||
|
break
|
||||||
}
|
}
|
||||||
if answerText != "" {
|
if answerText != "" {
|
||||||
chunkChan <- answerText
|
chunkChan <- answerText
|
||||||
|
tokenCount++
|
||||||
}
|
}
|
||||||
openAIToolChan <- chunk.ToolChunk
|
openAIToolChan <- chunk.ToolChunk
|
||||||
if chunk.FuncName != "" {
|
if chunk.FuncName != "" {
|
||||||
@@ -914,7 +935,6 @@ out:
|
|||||||
textView.ScrollToEnd()
|
textView.ScrollToEnd()
|
||||||
}
|
}
|
||||||
case <-streamDone:
|
case <-streamDone:
|
||||||
// drain any remaining chunks from chunkChan before exiting
|
|
||||||
for len(chunkChan) > 0 {
|
for len(chunkChan) > 0 {
|
||||||
chunk := <-chunkChan
|
chunk := <-chunkChan
|
||||||
fmt.Fprint(textView, chunk)
|
fmt.Fprint(textView, chunk)
|
||||||
@@ -923,31 +943,40 @@ out:
|
|||||||
textView.ScrollToEnd()
|
textView.ScrollToEnd()
|
||||||
}
|
}
|
||||||
if cfg.TTS_ENABLED {
|
if cfg.TTS_ENABLED {
|
||||||
// Send chunk to audio stream handler
|
|
||||||
TTSTextChan <- chunk
|
TTSTextChan <- chunk
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if cfg.TTS_ENABLED {
|
if cfg.TTS_ENABLED {
|
||||||
// msg is done; flush it down
|
|
||||||
TTSFlushChan <- true
|
TTSFlushChan <- true
|
||||||
}
|
}
|
||||||
break out
|
break out
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
var msgStats *models.ResponseStats
|
||||||
|
if lastRespStats != nil {
|
||||||
|
msgStats = &models.ResponseStats{
|
||||||
|
Tokens: lastRespStats.Tokens,
|
||||||
|
Duration: lastRespStats.Duration,
|
||||||
|
TokensPerSec: lastRespStats.TokensPerSec,
|
||||||
|
}
|
||||||
|
lastRespStats = nil
|
||||||
|
}
|
||||||
botRespMode = false
|
botRespMode = false
|
||||||
// numbers in chatbody and displayed must be the same
|
|
||||||
if r.Resume {
|
if r.Resume {
|
||||||
chatBody.Messages[len(chatBody.Messages)-1].Content += respText.String()
|
chatBody.Messages[len(chatBody.Messages)-1].Content += respText.String()
|
||||||
// lastM.Content = lastM.Content + respText.String()
|
|
||||||
// Process the updated message to check for known_to tags in resumed response
|
|
||||||
updatedMsg := chatBody.Messages[len(chatBody.Messages)-1]
|
updatedMsg := chatBody.Messages[len(chatBody.Messages)-1]
|
||||||
processedMsg := processMessageTag(&updatedMsg)
|
processedMsg := processMessageTag(&updatedMsg)
|
||||||
chatBody.Messages[len(chatBody.Messages)-1] = *processedMsg
|
chatBody.Messages[len(chatBody.Messages)-1] = *processedMsg
|
||||||
|
if msgStats != nil && chatBody.Messages[len(chatBody.Messages)-1].Role != cfg.ToolRole {
|
||||||
|
chatBody.Messages[len(chatBody.Messages)-1].Stats = msgStats
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Message was already added at the start, just process it for known_to tags
|
|
||||||
chatBody.Messages[msgIdx].Content = respText.String()
|
chatBody.Messages[msgIdx].Content = respText.String()
|
||||||
processedMsg := processMessageTag(&chatBody.Messages[msgIdx])
|
processedMsg := processMessageTag(&chatBody.Messages[msgIdx])
|
||||||
chatBody.Messages[msgIdx] = *processedMsg
|
chatBody.Messages[msgIdx] = *processedMsg
|
||||||
|
if msgStats != nil && chatBody.Messages[msgIdx].Role != cfg.ToolRole {
|
||||||
|
chatBody.Messages[msgIdx].Stats = msgStats
|
||||||
|
}
|
||||||
stopTTSIfNotForUser(&chatBody.Messages[msgIdx])
|
stopTTSIfNotForUser(&chatBody.Messages[msgIdx])
|
||||||
}
|
}
|
||||||
cleanChatBody()
|
cleanChatBody()
|
||||||
|
|||||||
@@ -110,6 +110,7 @@ type RoleMsg struct {
|
|||||||
ContentParts []any `json:"-"`
|
ContentParts []any `json:"-"`
|
||||||
ToolCallID string `json:"tool_call_id,omitempty"` // For tool response messages
|
ToolCallID string `json:"tool_call_id,omitempty"` // For tool response messages
|
||||||
KnownTo []string `json:"known_to,omitempty"`
|
KnownTo []string `json:"known_to,omitempty"`
|
||||||
|
Stats *ResponseStats `json:"stats"`
|
||||||
hasContentParts bool // Flag to indicate which content type to marshal
|
hasContentParts bool // Flag to indicate which content type to marshal
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -183,13 +184,11 @@ func (m *RoleMsg) UnmarshalJSON(data []byte) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *RoleMsg) ToText(i int) string {
|
func (m *RoleMsg) ToText(i int) string {
|
||||||
// Convert content to string representation
|
|
||||||
var contentStr string
|
var contentStr string
|
||||||
var imageIndicators []string
|
var imageIndicators []string
|
||||||
if !m.hasContentParts {
|
if !m.hasContentParts {
|
||||||
contentStr = m.Content
|
contentStr = m.Content
|
||||||
} else {
|
} else {
|
||||||
// For structured content, collect text parts and image indicators
|
|
||||||
var textParts []string
|
var textParts []string
|
||||||
for _, part := range m.ContentParts {
|
for _, part := range m.ContentParts {
|
||||||
switch p := part.(type) {
|
switch p := part.(type) {
|
||||||
@@ -198,7 +197,6 @@ func (m *RoleMsg) ToText(i int) string {
|
|||||||
textParts = append(textParts, p.Text)
|
textParts = append(textParts, p.Text)
|
||||||
}
|
}
|
||||||
case ImageContentPart:
|
case ImageContentPart:
|
||||||
// Collect image indicator
|
|
||||||
displayPath := p.Path
|
displayPath := p.Path
|
||||||
if displayPath == "" {
|
if displayPath == "" {
|
||||||
displayPath = "image"
|
displayPath = "image"
|
||||||
@@ -216,7 +214,6 @@ func (m *RoleMsg) ToText(i int) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
case "image_url":
|
case "image_url":
|
||||||
// Handle unmarshaled image content
|
|
||||||
var displayPath string
|
var displayPath string
|
||||||
if pathVal, pathExists := p["path"]; pathExists {
|
if pathVal, pathExists := p["path"]; pathExists {
|
||||||
if pathStr, isStr := pathVal.(string); isStr && pathStr != "" {
|
if pathStr, isStr := pathVal.(string); isStr && pathStr != "" {
|
||||||
@@ -233,23 +230,20 @@ func (m *RoleMsg) ToText(i int) string {
|
|||||||
}
|
}
|
||||||
contentStr = strings.Join(textParts, " ") + " "
|
contentStr = strings.Join(textParts, " ") + " "
|
||||||
}
|
}
|
||||||
// check if already has role annotation (/completion makes them)
|
|
||||||
// in that case remove it, and then add to icon
|
|
||||||
// since icon and content are separated by \n
|
|
||||||
contentStr, _ = strings.CutPrefix(contentStr, m.Role+":")
|
contentStr, _ = strings.CutPrefix(contentStr, m.Role+":")
|
||||||
// if !strings.HasPrefix(contentStr, m.Role+":") {
|
|
||||||
icon := fmt.Sprintf("(%d) <%s>: ", i, m.Role)
|
icon := fmt.Sprintf("(%d) <%s>: ", i, m.Role)
|
||||||
// }
|
|
||||||
// Build final message with image indicators before text
|
|
||||||
var finalContent strings.Builder
|
var finalContent strings.Builder
|
||||||
if len(imageIndicators) > 0 {
|
if len(imageIndicators) > 0 {
|
||||||
// Add each image indicator on its own line
|
|
||||||
for _, indicator := range imageIndicators {
|
for _, indicator := range imageIndicators {
|
||||||
finalContent.WriteString(indicator)
|
finalContent.WriteString(indicator)
|
||||||
finalContent.WriteString("\n")
|
finalContent.WriteString("\n")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
finalContent.WriteString(contentStr)
|
finalContent.WriteString(contentStr)
|
||||||
|
if m.Stats != nil {
|
||||||
|
finalContent.WriteString(fmt.Sprintf("\n[gray::i][%d tok, %.1fs, %.1f t/s][-:-:-]",
|
||||||
|
m.Stats.Tokens, m.Stats.Duration, m.Stats.TokensPerSec))
|
||||||
|
}
|
||||||
textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, finalContent.String())
|
textMsg := fmt.Sprintf("[-:-:b]%s[-:-:-]\n%s\n", icon, finalContent.String())
|
||||||
return strings.ReplaceAll(textMsg, "\n\n", "\n")
|
return strings.ReplaceAll(textMsg, "\n\n", "\n")
|
||||||
}
|
}
|
||||||
@@ -331,6 +325,7 @@ func (m *RoleMsg) Copy() RoleMsg {
|
|||||||
ContentParts: m.ContentParts,
|
ContentParts: m.ContentParts,
|
||||||
ToolCallID: m.ToolCallID,
|
ToolCallID: m.ToolCallID,
|
||||||
KnownTo: m.KnownTo,
|
KnownTo: m.KnownTo,
|
||||||
|
Stats: m.Stats,
|
||||||
hasContentParts: m.hasContentParts,
|
hasContentParts: m.hasContentParts,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -643,6 +638,12 @@ func (lcp *LCPModels) ListModels() []string {
|
|||||||
return resp
|
return resp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ResponseStats struct {
|
||||||
|
Tokens int
|
||||||
|
Duration float64
|
||||||
|
TokensPerSec float64
|
||||||
|
}
|
||||||
|
|
||||||
type ChatRoundReq struct {
|
type ChatRoundReq struct {
|
||||||
UserMsg string
|
UserMsg string
|
||||||
Role string
|
Role string
|
||||||
|
|||||||
Reference in New Issue
Block a user