init

2025-07-31 10:46:07 +03:00
commit 757e948659
7 changed files with 410 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,4 @@
 data
 grailbench
 *.json
 config.toml
--- a/config/config.go
+++ b/config/config.go
@@ -0,0 +1,29 @@
 package config
 import (
 	"fmt"
 	"github.com/BurntSushi/toml"
 )
 type Config struct {
 	CurrentAPI    string `toml:"CurrentAPI"`
 	APIToken      string `toml:"APIToken"`
 	QuestionsPath string `toml:"QuestionsPath"`
 	OutPath       string `toml:"OutPath"`
 }
 func LoadConfigOrDefault(fn string) *Config {
 	if fn == "" {
 		fn = "config.toml"
 	}
 	config := &Config{}
 	_, err := toml.DecodeFile(fn, &config)
 	if err != nil {
 		fmt.Println("failed to read config from file, loading default", "error", err)
 		config.CurrentAPI = "http://localhost:8080/completion"
 		config.QuestionsPath = "data/questions.json"
 		config.OutPath = "data/out.json"
 	}
 	return config
 }
--- a/go.mod
+++ b/go.mod
@@ -0,0 +1,5 @@
 module grailbench
 go 1.24.5
 require github.com/BurntSushi/toml v1.5.0
--- a/go.sum
+++ b/go.sum
@@ -0,0 +1,2 @@
 github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg=
 github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
--- a/main.go
+++ b/main.go
@@ -0,0 +1,168 @@
 package main
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"log/slog"
 	"net/http"
 	"os"
 	"time"
 	"grailbench/config"
 )
 var (
 	logger       *slog.Logger
 	cfg          *config.Config
 	httpClient   = &http.Client{}
 	currentModel = ""
 )
 type Question struct {
 	ID       string `json:"id"`
 	Topic    string `json:"topic"`
 	Question string `json:"question"`
 }
 type Answer struct {
 	Q      Question
 	Answer string `json:"answer"`
 	Model  string `json:"model"`
 	// resp time?
 }
 func loadQuestions(fp string) ([]Question, error) {
 	data, err := os.ReadFile(fp)
 	if err != nil {
 		logger.Error("failed to read file", "error", err, "fp", fp)
 		return nil, err
 	}
 	resp := []Question{}
 	if err := json.Unmarshal(data, &resp); err != nil {
 		logger.Error("failed to unmarshal file", "error", err, "fp", fp)
 		return nil, err
 	}
 	return resp, nil
 }
 func chooseParser() RespParser {
 	return NewLCPRespParser(logger)
 }
 func init() {
 	logger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
 		Level:     slog.LevelDebug,
 		AddSource: true,
 	}))
 	cfg = config.LoadConfigOrDefault("config.toml")
 }
 func main() {
 	questions, err := loadQuestions(cfg.QuestionsPath)
 	if err != nil {
 		panic(err)
 	}
 	answers, err := runBench(questions)
 	if err != nil {
 		panic(err)
 	}
 	data, err := json.MarshalIndent(answers, " ", " ")
 	if err != nil {
 		panic(err)
 	}
 	if err := os.WriteFile(cfg.OutPath, data, 0666); err != nil {
 		panic(err)
 	}
 }
 func runBench(questions []Question) ([]Answer, error) {
 	answers := []Answer{}
 	for _, q := range questions {
 		resp, err := callLLM(buildPrompt(q.Question))
 		if err != nil {
 			// log
 			panic(err)
 		}
 		parser := chooseParser()
 		respText, err := parser.ParseBytes(resp)
 		if err != nil {
 			panic(err)
 		}
 		a := Answer{Q: q, Answer: respText, Model: currentModel}
 		answers = append(answers, a)
 	}
 	return answers, nil
 }
 // openai vs completion
 func buildPrompt(q string) string {
 	// sure injection?
 	// completion
 	return fmt.Sprintf(`Q:\n%s\nA:\nSure,`, q)
 }
 func callLLM(prompt string) ([]byte, error) {
 	method := "POST"
 	// Generate the payload once as bytes
 	parser := chooseParser()
 	payloadReader := parser.MakePayload(prompt)
 	client := &http.Client{}
 	maxRetries := 6
 	baseDelay := 2 // seconds
 	for attempt := 0; attempt < maxRetries; attempt++ {
 		// Create a new request for the attempt
 		req, err := http.NewRequest(method, cfg.CurrentAPI, payloadReader)
 		if err != nil {
 			if attempt == maxRetries-1 {
 				return nil, fmt.Errorf("LLM call failed after %d retries on request creation: %w", maxRetries, err)
 			}
 			logger.Error("failed to make new request; will retry", "error", err, "url", cfg.CurrentAPI, "attempt", attempt)
 			time.Sleep(time.Duration(baseDelay) * time.Second * time.Duration(attempt+1))
 			continue
 		}
 		req.Header.Add("Content-Type", "application/json")
 		req.Header.Add("Accept", "application/json")
 		req.Header.Add("Authorization", "Bearer "+cfg.APIToken)
 		resp, err := client.Do(req)
 		if err != nil {
 			if attempt == maxRetries-1 {
 				return nil, fmt.Errorf("LLM call failed after %d retries on client.Do: %w", maxRetries, err)
 			}
 			logger.Error("http request failed; will retry", "error", err, "url", cfg.CurrentAPI, "attempt", attempt)
 			delay := time.Duration(baseDelay*(attempt+1)) * time.Second
 			time.Sleep(delay)
 			continue
 		}
 		body, err := io.ReadAll(resp.Body)
 		resp.Body.Close()
 		if err != nil {
 			if attempt == maxRetries-1 {
 				return nil, fmt.Errorf("LLM call failed after %d retries on reading body: %w", maxRetries, err)
 			}
 			logger.Error("failed to read response body; will retry", "error", err, "url", cfg.CurrentAPI, "attempt", attempt)
 			delay := time.Duration(baseDelay*(attempt+1)) * time.Second
 			time.Sleep(delay)
 			continue
 		}
 		// Check status code
 		if resp.StatusCode >= 400 && resp.StatusCode < 600 {
 			if attempt == maxRetries-1 {
 				return nil, fmt.Errorf("LLM call failed after %d retries, got status %d", maxRetries, resp.StatusCode)
 			}
 			logger.Warn("retriable status code; will retry", "code", resp.StatusCode, "attempt", attempt)
 			delay := time.Duration((baseDelay * (1 << attempt))) * time.Second
 			time.Sleep(delay)
 			continue
 		}
 		if resp.StatusCode != http.StatusOK {
 			// For non-retriable errors, return immediately
 			return nil, fmt.Errorf("non-retriable status %d, body: %s", resp.StatusCode, string(body))
 		}
 		// Success
 		logger.Debug("llm resp", "body", string(body), "url", cfg.CurrentAPI, "attempt", attempt)
 		return body, nil
 	}
 	return nil, errors.New("unknown error in retry loop")
 }
--- a/models.go
+++ b/models.go
@@ -0,0 +1,56 @@
 package main
 type OpenRouterResp struct {
 	ID       string `json:"id"`
 	Provider string `json:"provider"`
 	Model    string `json:"model"`
 	Object   string `json:"object"`
 	Created  int    `json:"created"`
 	Choices  []struct {
 		Logprobs           any    `json:"logprobs"`
 		FinishReason       string `json:"finish_reason"`
 		NativeFinishReason string `json:"native_finish_reason"`
 		Index              int    `json:"index"`
 		Message            struct {
 			Role      string `json:"role"`
 			Content   string `json:"content"`
 			Refusal   any    `json:"refusal"`
 			Reasoning any    `json:"reasoning"`
 		} `json:"message"`
 	} `json:"choices"`
 	Usage struct {
 		PromptTokens     int `json:"prompt_tokens"`
 		CompletionTokens int `json:"completion_tokens"`
 		TotalTokens      int `json:"total_tokens"`
 	} `json:"usage"`
 }
 type DSResp struct {
 	ID      string `json:"id"`
 	Choices []struct {
 		Text         string `json:"text"`
 		Index        int    `json:"index"`
 		FinishReason string `json:"finish_reason"`
 	} `json:"choices"`
 	Created           int    `json:"created"`
 	Model             string `json:"model"`
 	SystemFingerprint string `json:"system_fingerprint"`
 	Object            string `json:"object"`
 }
 type LLMResp struct {
 	Index           int    `json:"index"`
 	Content         string `json:"content"`
 	Tokens          []any  `json:"tokens"`
 	IDSlot          int    `json:"id_slot"`
 	Stop            bool   `json:"stop"`
 	Model           string `json:"model"`
 	TokensPredicted int    `json:"tokens_predicted"`
 	TokensEvaluated int    `json:"tokens_evaluated"`
 	Prompt          string `json:"prompt"`
 	HasNewLine      bool   `json:"has_new_line"`
 	Truncated       bool   `json:"truncated"`
 	StopType        string `json:"stop_type"`
 	StoppingWord    string `json:"stopping_word"`
 	TokensCached    int    `json:"tokens_cached"`
 }
--- a/parser.go
+++ b/parser.go
@@ -0,0 +1,146 @@
 package main
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"log/slog"
 	"strings"
 )
 type RespParser interface {
 	ParseBytes(body []byte) (string, error)
 	MakePayload(prompt string) io.Reader
 }
 // DeepSeekParser: deepseek implementation of RespParser
 type deepSeekParser struct {
 	log *slog.Logger
 }
 func NewDeepSeekParser(log *slog.Logger) *deepSeekParser {
 	return &deepSeekParser{log: log}
 }
 func (p *deepSeekParser) ParseBytes(body []byte) (string, error) {
 	// parsing logic here
 	dsResp := DSResp{}
 	if err := json.Unmarshal(body, &dsResp); err != nil {
 		p.log.Error("failed to unmarshall", "error", err)
 		return "", err
 	}
 	if len(dsResp.Choices) == 0 {
 		p.log.Error("empty choices", "dsResp", dsResp)
 		err := errors.New("empty choices in dsResp")
 		return "", err
 	}
 	text := dsResp.Choices[0].Text
 	return text, nil
 }
 func (p *deepSeekParser) MakePayload(prompt string) io.Reader {
 	return strings.NewReader(fmt.Sprintf(`{
 	  "model": "deepseek-chat",
 	  "prompt": "%s",
 	  "echo": false,
 	  "frequency_penalty": 0,
 	  "logprobs": 0,
 	  "max_tokens": 1024,
 	  "presence_penalty": 0,
 	  "stop": null,
 	  "stream": false,
 	  "stream_options": null,
 	  "suffix": null,
 	  "temperature": 1,
 	  "top_p": 1
 	}`, prompt))
 }
 // llama.cpp implementation of RespParser
 type lcpRespParser struct {
 	log *slog.Logger
 }
 func NewLCPRespParser(log *slog.Logger) *lcpRespParser {
 	return &lcpRespParser{log: log}
 }
 func (p *lcpRespParser) ParseBytes(body []byte) (string, error) {
 	// parsing logic here
 	resp := LLMResp{}
 	if err := json.Unmarshal(body, &resp); err != nil {
 		p.log.Error("failed to unmarshal", "error", err)
 		return "", err
 	}
 	return resp.Content, nil
 }
 func (p *lcpRespParser) MakePayload(prompt string) io.Reader {
 	return strings.NewReader(fmt.Sprintf(`{
 	  "model": "local-model",
 	  "prompt": "%s",
 	  "frequency_penalty": 0,
 	  "max_tokens": 1024,
 	  "stop": ["Q:\n", "A:\n"],
 	  "stream": false,
 	  "temperature": 0.4,
 	  "top_p": 1
 	}`, prompt))
 }
 type openRouterParser struct {
 	log        *slog.Logger
 	modelIndex uint32
 }
 func NewOpenRouterParser(log *slog.Logger) *openRouterParser {
 	return &openRouterParser{
 		log:        log,
 		modelIndex: 0,
 	}
 }
 func (p *openRouterParser) ParseBytes(body []byte) (string, error) {
 	// parsing logic here
 	resp := OpenRouterResp{}
 	if err := json.Unmarshal(body, &resp); err != nil {
 		p.log.Error("failed to unmarshal", "error", err)
 		return "", err
 	}
 	if len(resp.Choices) == 0 {
 		p.log.Error("empty choices", "resp", resp)
 		err := errors.New("empty choices in resp")
 		return "", err
 	}
 	text := resp.Choices[0].Message.Content
 	return text, nil
 }
 func (p *openRouterParser) MakePayload(prompt string) io.Reader {
 	// Models to rotate through
 	models := []string{
 		"google/gemini-2.0-flash-exp:free",
 		"deepseek/deepseek-chat-v3-0324:free",
 		"mistralai/mistral-small-3.2-24b-instruct:free",
 		"qwen/qwen3-14b:free",
 		"deepseek/deepseek-r1:free",
 		"google/gemma-3-27b-it:free",
 		"meta-llama/llama-3.3-70b-instruct:free",
 	}
 	// Get next model index using atomic addition for thread safety
 	p.modelIndex++
 	model := models[int(p.modelIndex)%len(models)]
 	strPayload := fmt.Sprintf(`{
 	"model": "%s",
 	"max_tokens": 300,
 	"messages": [
 		{
 		"role": "user",
 		"content": "%s"
 		}
 	]
 	}`, model, prompt)
 	p.log.Debug("made openrouter payload", "model", model, "payload", strPayload)
 	return strings.NewReader(strPayload)
 }
		`@@ -0,0 +1,2 @@`
							`github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg=`
							`github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=`