god-eye/internal/ai/ensure.go

package ai

import (
	"bufio"
	"bytes"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"net/http"
	"strings"
	"time"
)

// ModelEnsurer verifies that a given list of Ollama models is present on
// the local server, and pulls any that are missing. Designed for the
// pre-scan warmup: God's Eye should not crash mid-scan because a model
// wasn't downloaded — EnsureAll fixes that before the pipeline starts.
type ModelEnsurer struct {
	BaseURL string
	Client  *http.Client
	Verbose bool
	Writer  io.Writer // where progress is printed; defaults to os.Stdout if nil
}

// NewModelEnsurer constructs an ensurer against the given Ollama base URL
// (e.g. "http://localhost:11434"). The HTTP client has no timeout because
// a fresh pull of a 30B model can legitimately take 10+ minutes.
func NewModelEnsurer(baseURL string) *ModelEnsurer {
	if baseURL == "" {
		baseURL = "http://localhost:11434"
	}
	return &ModelEnsurer{
		BaseURL: strings.TrimRight(baseURL, "/"),
		Client:  &http.Client{Timeout: 0},
	}
}

// Installed returns the set of model tags currently available on the
// Ollama server, keyed by the full name (e.g. "qwen3:1.7b").
func (e *ModelEnsurer) Installed(ctx context.Context) (map[string]bool, error) {
	req, err := http.NewRequestWithContext(ctx, "GET", e.BaseURL+"/api/tags", nil)
	if err != nil {
		return nil, err
	}
	c := &http.Client{Timeout: 10 * time.Second}
	resp, err := c.Do(req)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()
	if resp.StatusCode != 200 {
		return nil, fmt.Errorf("ollama /api/tags returned %d", resp.StatusCode)
	}

	var body struct {
		Models []struct {
			Name string `json:"name"`
		} `json:"models"`
	}
	if err := json.NewDecoder(resp.Body).Decode(&body); err != nil {
		return nil, err
	}
	out := make(map[string]bool, len(body.Models))
	for _, m := range body.Models {
		out[m.Name] = true
	}
	return out, nil
}

// Pull streams a model pull from Ollama, printing progress lines when
// Verbose is true. Uses POST /api/pull with stream=true; each JSON line
// reports status + optional {total, completed} for byte-level progress.
func (e *ModelEnsurer) Pull(ctx context.Context, model string) error {
	payload := map[string]interface{}{"name": model, "stream": true}
	body, _ := json.Marshal(payload)
	req, err := http.NewRequestWithContext(ctx, "POST", e.BaseURL+"/api/pull", bytes.NewReader(body))
	if err != nil {
		return err
	}
	req.Header.Set("Content-Type", "application/json")
	resp, err := e.Client.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	if resp.StatusCode != 200 {
		b, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
		return fmt.Errorf("ollama /api/pull returned %d: %s", resp.StatusCode, strings.TrimSpace(string(b)))
	}

	scanner := bufio.NewScanner(resp.Body)
	// Progress events can be large; bump the scanner buffer.
	scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)

	var lastStatus string
	var lastPct int
	for scanner.Scan() {
		line := bytes.TrimSpace(scanner.Bytes())
		if len(line) == 0 {
			continue
		}
		var ev struct {
			Status    string `json:"status"`
			Digest    string `json:"digest,omitempty"`
			Total     int64  `json:"total,omitempty"`
			Completed int64  `json:"completed,omitempty"`
			Error     string `json:"error,omitempty"`
		}
		if err := json.Unmarshal(line, &ev); err != nil {
			continue
		}
		if ev.Error != "" {
			return fmt.Errorf("pull %s: %s", model, ev.Error)
		}
		if !e.Verbose {
			continue
		}
		w := e.writer()
		if ev.Total > 0 && ev.Completed > 0 {
			pct := int(float64(ev.Completed) / float64(ev.Total) * 100)
			// Throttle: new status line always; otherwise only print when
			// the percentage has moved ≥5 points since the last emission
			// (or reaches a final 100% for this status exactly once).
			switch {
			case ev.Status != lastStatus:
				fmt.Fprintf(w, "  %-24s %3d%%  %s / %s\n", ev.Status, pct, humanBytes(ev.Completed), humanBytes(ev.Total))
				lastStatus = ev.Status
				lastPct = pct
			case pct >= lastPct+5 && pct < 100:
				fmt.Fprintf(w, "  %-24s %3d%%  %s / %s\n", ev.Status, pct, humanBytes(ev.Completed), humanBytes(ev.Total))
				lastPct = pct
			case pct == 100 && lastPct < 100:
				fmt.Fprintf(w, "  %-24s %3d%%  %s / %s\n", ev.Status, pct, humanBytes(ev.Completed), humanBytes(ev.Total))
				lastPct = 100
			}
		} else if ev.Status != lastStatus {
			fmt.Fprintf(w, "  %s\n", ev.Status)
			lastStatus = ev.Status
			lastPct = 0
		}
	}
	return scanner.Err()
}

// EnsureAll checks every name in models. For each missing one it calls Pull.
// Already-present models are skipped. Returns on the first error.
//
// Name matching is generous: Ollama sometimes tags models as "qwen3:1.7b"
// and sometimes as "qwen3:1.7b-instruct-fp16", so we accept exact match,
// a ":latest" variant, or the bare model name with no tag.
func (e *ModelEnsurer) EnsureAll(ctx context.Context, models []string) error {
	installed, err := e.Installed(ctx)
	if err != nil {
		return fmt.Errorf("query ollama: %w", err)
	}

	unique := dedup(models)
	missing := []string{}
	for _, m := range unique {
		if alreadyInstalled(installed, m) {
			if e.Verbose {
				fmt.Fprintf(e.writer(), "✓ %s already installed\n", m)
			}
			continue
		}
		missing = append(missing, m)
	}

	if len(missing) == 0 {
		return nil
	}

	if e.Verbose {
		fmt.Fprintf(e.writer(), "↓ Pulling %d missing model(s): %s\n", len(missing), strings.Join(missing, ", "))
	}
	for _, m := range missing {
		if err := ctx.Err(); err != nil {
			return err
		}
		if e.Verbose {
			fmt.Fprintf(e.writer(), "↓ %s\n", m)
		}
		if err := e.Pull(ctx, m); err != nil {
			return fmt.Errorf("pull %s: %w", m, err)
		}
		if e.Verbose {
			fmt.Fprintf(e.writer(), "✓ %s ready\n", m)
		}
	}
	return nil
}

// Reachable reports whether the Ollama server answers /api/tags. Callers
// should check this before EnsureAll to surface a friendly message.
func (e *ModelEnsurer) Reachable(ctx context.Context) error {
	c := &http.Client{Timeout: 3 * time.Second}
	req, err := http.NewRequestWithContext(ctx, "GET", e.BaseURL+"/api/tags", nil)
	if err != nil {
		return err
	}
	resp, err := c.Do(req)
	if err != nil {
		return errors.New("ollama not reachable at " + e.BaseURL + " (is `ollama serve` running?)")
	}
	resp.Body.Close()
	if resp.StatusCode != 200 {
		return fmt.Errorf("ollama at %s returned %d", e.BaseURL, resp.StatusCode)
	}
	return nil
}

func (e *ModelEnsurer) writer() io.Writer {
	if e.Writer != nil {
		return e.Writer
	}
	return stdout
}

var stdout io.Writer // populated by main via SetStdout; nil writer would fmt-print to os.Stdout

// SetStdout installs the writer used when ModelEnsurer.Writer is nil. main.go
// sets this to os.Stdout; tests can set it to a bytes.Buffer.
func SetStdout(w io.Writer) { stdout = w }

func alreadyInstalled(installed map[string]bool, model string) bool {
	if installed[model] {
		return true
	}
	if installed[model+":latest"] {
		return true
	}
	if strings.Contains(model, ":") {
		base := strings.SplitN(model, ":", 2)[0]
		if installed[base] || installed[base+":latest"] {
			return true
		}
	}
	return false
}

func dedup(ss []string) []string {
	seen := make(map[string]struct{}, len(ss))
	out := make([]string, 0, len(ss))
	for _, s := range ss {
		s = strings.TrimSpace(s)
		if s == "" {
			continue
		}
		if _, ok := seen[s]; ok {
			continue
		}
		seen[s] = struct{}{}
		out = append(out, s)
	}
	return out
}

func humanBytes(n int64) string {
	const k = 1024.0
	if n < int64(k) {
		return fmt.Sprintf("%dB", n)
	}
	units := []string{"KB", "MB", "GB", "TB"}
	v := float64(n) / k
	for _, u := range units {
		if v < k {
			return fmt.Sprintf("%.1f%s", v, u)
		}
		v /= k
	}
	return fmt.Sprintf("%.1fPB", v)
}