proxy

package
v0.183.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 11, 2026 License: MIT Imports: 33 Imported by: 0

Documentation

Index

Constants

View Source
const ChatCompletionStatsEventID = 0x02
View Source
const ConfigFileChangedEventID = 0x03
View Source
const LogDataEventID = 0x04
View Source
const ModelPreloadedEventID = 0x06
View Source
const (
	PROFILE_SPLIT_CHAR = ":"
)
View Source
const ProcessStateChangeEventID = 0x01
View Source
const TokenMetricsEventID = 0x05

Variables

View Source
var (
	ErrExpectedStateMismatch  = errors.New("expected state mismatch")
	ErrInvalidStateTransition = errors.New("invalid state transition")
)

custom error types for swapping state

Functions

func GetReactFS

func GetReactFS() (http.FileSystem, error)

GetReactFS returns the embedded React filesystem

func GetReactIndexHTML

func GetReactIndexHTML() ([]byte, error)

GetReactIndexHTML returns the main index.html for the React app

func SanitizeAccessControlRequestHeaderValues

func SanitizeAccessControlRequestHeaderValues(headerValues string) string

Types

type ChatCompletionStats

type ChatCompletionStats struct {
	TokensGenerated int
}

func (ChatCompletionStats) Type

func (e ChatCompletionStats) Type() uint32

type ConfigFileChangedEvent

type ConfigFileChangedEvent struct {
	ReloadingState ReloadingState
}

func (ConfigFileChangedEvent) Type

func (e ConfigFileChangedEvent) Type() uint32

type DiscardWriter

type DiscardWriter struct {
	// contains filtered or unexported fields
}

Custom discard writer that implements http.ResponseWriter but just discards everything

func (*DiscardWriter) Flush

func (w *DiscardWriter) Flush()

Satisfy the http.Flusher interface for streaming responses

func (*DiscardWriter) Header

func (w *DiscardWriter) Header() http.Header

func (*DiscardWriter) Write

func (w *DiscardWriter) Write(data []byte) (int, error)

func (*DiscardWriter) WriteHeader

func (w *DiscardWriter) WriteHeader(code int)

type LogDataEvent

type LogDataEvent struct {
	Data []byte
}

func (LogDataEvent) Type

func (e LogDataEvent) Type() uint32

type LogLevel

type LogLevel int
const (
	LevelDebug LogLevel = iota
	LevelInfo
	LevelWarn
	LevelError

	LogBufferSize = 100 * 1024
)

func (LogLevel) String

func (l LogLevel) String() string

type LogMonitor

type LogMonitor struct {
	// contains filtered or unexported fields
}

func NewLogMonitor

func NewLogMonitor() *LogMonitor

func NewLogMonitorWriter

func NewLogMonitorWriter(stdout io.Writer) *LogMonitor

func (*LogMonitor) Clear

func (w *LogMonitor) Clear()

Clear releases the buffer memory, making it eligible for GC. The buffer will be lazily re-allocated on the next Write.

func (*LogMonitor) Debug

func (w *LogMonitor) Debug(msg string)

func (*LogMonitor) Debugf

func (w *LogMonitor) Debugf(format string, args ...interface{})

func (*LogMonitor) Error

func (w *LogMonitor) Error(msg string)

func (*LogMonitor) Errorf

func (w *LogMonitor) Errorf(format string, args ...interface{})

func (*LogMonitor) GetHistory

func (w *LogMonitor) GetHistory() []byte

func (*LogMonitor) Info

func (w *LogMonitor) Info(msg string)

func (*LogMonitor) Infof

func (w *LogMonitor) Infof(format string, args ...interface{})

func (*LogMonitor) OnLogData

func (w *LogMonitor) OnLogData(callback func(data []byte)) context.CancelFunc

func (*LogMonitor) SetLogLevel

func (w *LogMonitor) SetLogLevel(level LogLevel)

func (*LogMonitor) SetLogTimeFormat

func (w *LogMonitor) SetLogTimeFormat(timeFormat string)

func (*LogMonitor) SetPrefix

func (w *LogMonitor) SetPrefix(prefix string)

func (*LogMonitor) Warn

func (w *LogMonitor) Warn(msg string)

func (*LogMonitor) Warnf

func (w *LogMonitor) Warnf(format string, args ...interface{})

func (*LogMonitor) Write

func (w *LogMonitor) Write(p []byte) (n int, err error)

type Model

type Model struct {
	Id          string `json:"id"`
	Name        string `json:"name"`
	Description string `json:"description"`
	State       string `json:"state"`
	Unlisted    bool   `json:"unlisted"`
	PeerID      string `json:"peerID"`
}

type ModelPreloadedEvent

type ModelPreloadedEvent struct {
	ModelName string
	Success   bool
}

func (ModelPreloadedEvent) Type

func (e ModelPreloadedEvent) Type() uint32

type PeerProxy

type PeerProxy struct {
	// contains filtered or unexported fields
}

func NewPeerProxy

func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*PeerProxy, error)

func (*PeerProxy) HasPeerModel

func (p *PeerProxy) HasPeerModel(modelID string) bool

func (*PeerProxy) ListPeers

func (p *PeerProxy) ListPeers() config.PeerDictionaryConfig

func (*PeerProxy) ProxyRequest

func (p *PeerProxy) ProxyRequest(model_id string, writer http.ResponseWriter, request *http.Request) error

type Process

type Process struct {
	ID string
	// contains filtered or unexported fields
}

func NewProcess

func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, processLogger *LogMonitor, proxyLogger *LogMonitor) *Process

func (*Process) CurrentState

func (p *Process) CurrentState() ProcessState

func (*Process) LogMonitor

func (p *Process) LogMonitor() *LogMonitor

LogMonitor returns the log monitor associated with the process.

func (*Process) Logger

func (p *Process) Logger() *LogMonitor

Logger returns the logger for this process.

func (*Process) ProxyRequest

func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request)

func (*Process) Shutdown

func (p *Process) Shutdown()

Shutdown is called when llama-swap is shutting down. It will give a little bit of time for any inflight requests to complete before shutting down. If the Process is in the state of starting, it will cancel it and shut it down. Once a process is in the StateShutdown state, it can not be started again.

func (*Process) Stop

func (p *Process) Stop()

Stop will wait for inflight requests to complete before stopping the process.

func (*Process) StopImmediately

func (p *Process) StopImmediately()

StopImmediately will transition the process to the stopping state and stop the process with a SIGTERM. If the process does not stop within the specified timeout, it will be forcefully stopped with a SIGKILL.

type ProcessGroup

type ProcessGroup struct {
	sync.Mutex
	// contains filtered or unexported fields
}

func NewProcessGroup

func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, upstreamLogger *LogMonitor) *ProcessGroup

func (*ProcessGroup) GetMember

func (pg *ProcessGroup) GetMember(modelName string) (*Process, bool)

func (*ProcessGroup) HasMember

func (pg *ProcessGroup) HasMember(modelName string) bool

func (*ProcessGroup) ProxyRequest

func (pg *ProcessGroup) ProxyRequest(modelID string, writer http.ResponseWriter, request *http.Request) error

ProxyRequest proxies a request to the specified model

func (*ProcessGroup) Shutdown

func (pg *ProcessGroup) Shutdown()

func (*ProcessGroup) StopProcess

func (pg *ProcessGroup) StopProcess(modelID string, strategy StopStrategy) error

func (*ProcessGroup) StopProcesses

func (pg *ProcessGroup) StopProcesses(strategy StopStrategy)

type ProcessState

type ProcessState string
const (
	StateStopped  ProcessState = ProcessState("stopped")
	StateStarting ProcessState = ProcessState("starting")
	StateReady    ProcessState = ProcessState("ready")
	StateStopping ProcessState = ProcessState("stopping")

	// process is shutdown and will not be restarted
	StateShutdown ProcessState = ProcessState("shutdown")
)

type ProcessStateChangeEvent

type ProcessStateChangeEvent struct {
	ProcessName string
	NewState    ProcessState
	OldState    ProcessState
}

func (ProcessStateChangeEvent) Type

type ProxyManager

type ProxyManager struct {
	sync.Mutex
	// contains filtered or unexported fields
}

func New

func New(proxyConfig config.Config) *ProxyManager

func (*ProxyManager) ListModelsHandler

func (pm *ProxyManager) ListModelsHandler(c *gin.Context)

func (*ProxyManager) ListRunningProcessesHandler

func (pm *ProxyManager) ListRunningProcessesHandler(context *gin.Context)

func (*ProxyManager) ProxyOAIPostFormHandler

func (pm *ProxyManager) ProxyOAIPostFormHandler(c *gin.Context)

func (*ProxyManager) ProxyToFirstRunningProcess

func (pm *ProxyManager) ProxyToFirstRunningProcess(c *gin.Context)

ProxyToFirstRunningProcess forwards the request to the any running process (llama-server)

func (*ProxyManager) ServeHTTP

func (pm *ProxyManager) ServeHTTP(w http.ResponseWriter, r *http.Request)

ServeHTTP implements http.Handler interface

func (*ProxyManager) SetVersion

func (pm *ProxyManager) SetVersion(buildDate string, commit string, version string)

func (*ProxyManager) Shutdown

func (pm *ProxyManager) Shutdown()

Shutdown stops all processes managed by this ProxyManager

func (*ProxyManager) StopProcesses

func (pm *ProxyManager) StopProcesses(strategy StopStrategy)

StopProcesses acquires a lock and stops all running upstream processes. This is the public method safe for concurrent calls. Unlike Shutdown, this method only stops the processes but doesn't perform a complete shutdown, allowing for process replacement without full termination.

func (*ProxyManager) StreamLogsHandler

func (pm *ProxyManager) StreamLogsHandler(c *gin.Context)

func (*ProxyManager) UnloadAllModelsHandler

func (pm *ProxyManager) UnloadAllModelsHandler(c *gin.Context)

type ReloadingState

type ReloadingState int
const (
	ReloadingStateStart ReloadingState = iota
	ReloadingStateEnd
)

type StopStrategy

type StopStrategy int
const (
	StopImmediately StopStrategy = iota
	StopWaitForInflightRequest
)

type TokenMetrics

type TokenMetrics struct {
	ID              int       `json:"id"`
	Timestamp       time.Time `json:"timestamp"`
	Model           string    `json:"model"`
	CachedTokens    int       `json:"cache_tokens"`
	InputTokens     int       `json:"input_tokens"`
	OutputTokens    int       `json:"output_tokens"`
	PromptPerSecond float64   `json:"prompt_per_second"`
	TokensPerSecond float64   `json:"tokens_per_second"`
	DurationMs      int       `json:"duration_ms"`
}

TokenMetrics represents parsed token statistics from llama-server logs

type TokenMetricsEvent

type TokenMetricsEvent struct {
	Metrics TokenMetrics
}

TokenMetricsEvent represents a token metrics event

func (TokenMetricsEvent) Type

func (e TokenMetricsEvent) Type() uint32

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL