Documentation
¶
Index ¶
- Constants
- Variables
- func GetReactFS() (http.FileSystem, error)
- func GetReactIndexHTML() ([]byte, error)
- func SanitizeAccessControlRequestHeaderValues(headerValues string) string
- type ChatCompletionStats
- type ConfigFileChangedEvent
- type DiscardWriter
- type LogDataEvent
- type LogLevel
- type LogMonitor
- func (w *LogMonitor) Clear()
- func (w *LogMonitor) Debug(msg string)
- func (w *LogMonitor) Debugf(format string, args ...interface{})
- func (w *LogMonitor) Error(msg string)
- func (w *LogMonitor) Errorf(format string, args ...interface{})
- func (w *LogMonitor) GetHistory() []byte
- func (w *LogMonitor) Info(msg string)
- func (w *LogMonitor) Infof(format string, args ...interface{})
- func (w *LogMonitor) OnLogData(callback func(data []byte)) context.CancelFunc
- func (w *LogMonitor) SetLogLevel(level LogLevel)
- func (w *LogMonitor) SetLogTimeFormat(timeFormat string)
- func (w *LogMonitor) SetPrefix(prefix string)
- func (w *LogMonitor) Warn(msg string)
- func (w *LogMonitor) Warnf(format string, args ...interface{})
- func (w *LogMonitor) Write(p []byte) (n int, err error)
- type Model
- type ModelPreloadedEvent
- type PeerProxy
- type Process
- type ProcessGroup
- func (pg *ProcessGroup) GetMember(modelName string) (*Process, bool)
- func (pg *ProcessGroup) HasMember(modelName string) bool
- func (pg *ProcessGroup) ProxyRequest(modelID string, writer http.ResponseWriter, request *http.Request) error
- func (pg *ProcessGroup) Shutdown()
- func (pg *ProcessGroup) StopProcess(modelID string, strategy StopStrategy) error
- func (pg *ProcessGroup) StopProcesses(strategy StopStrategy)
- type ProcessState
- type ProcessStateChangeEvent
- type ProxyManager
- func (pm *ProxyManager) ListModelsHandler(c *gin.Context)
- func (pm *ProxyManager) ListRunningProcessesHandler(context *gin.Context)
- func (pm *ProxyManager) ProxyOAIPostFormHandler(c *gin.Context)
- func (pm *ProxyManager) ProxyToFirstRunningProcess(c *gin.Context)
- func (pm *ProxyManager) ServeHTTP(w http.ResponseWriter, r *http.Request)
- func (pm *ProxyManager) SetVersion(buildDate string, commit string, version string)
- func (pm *ProxyManager) Shutdown()
- func (pm *ProxyManager) StopProcesses(strategy StopStrategy)
- func (pm *ProxyManager) StreamLogsHandler(c *gin.Context)
- func (pm *ProxyManager) UnloadAllModelsHandler(c *gin.Context)
- type ReloadingState
- type StopStrategy
- type TokenMetrics
- type TokenMetricsEvent
Constants ¶
const ChatCompletionStatsEventID = 0x02
const ConfigFileChangedEventID = 0x03
const LogDataEventID = 0x04
const ModelPreloadedEventID = 0x06
const (
PROFILE_SPLIT_CHAR = ":"
)
const ProcessStateChangeEventID = 0x01
const TokenMetricsEventID = 0x05
Variables ¶
var ( ErrExpectedStateMismatch = errors.New("expected state mismatch") ErrInvalidStateTransition = errors.New("invalid state transition") )
custom error types for swapping state
Functions ¶
func GetReactFS ¶
func GetReactFS() (http.FileSystem, error)
GetReactFS returns the embedded React filesystem
func GetReactIndexHTML ¶
GetReactIndexHTML returns the main index.html for the React app
Types ¶
type ChatCompletionStats ¶
type ChatCompletionStats struct {
TokensGenerated int
}
func (ChatCompletionStats) Type ¶
func (e ChatCompletionStats) Type() uint32
type ConfigFileChangedEvent ¶
type ConfigFileChangedEvent struct {
ReloadingState ReloadingState
}
func (ConfigFileChangedEvent) Type ¶
func (e ConfigFileChangedEvent) Type() uint32
type DiscardWriter ¶
type DiscardWriter struct {
// contains filtered or unexported fields
}
Custom discard writer that implements http.ResponseWriter but just discards everything
func (*DiscardWriter) Flush ¶
func (w *DiscardWriter) Flush()
Satisfy the http.Flusher interface for streaming responses
func (*DiscardWriter) Header ¶
func (w *DiscardWriter) Header() http.Header
func (*DiscardWriter) WriteHeader ¶
func (w *DiscardWriter) WriteHeader(code int)
type LogDataEvent ¶
type LogDataEvent struct {
Data []byte
}
func (LogDataEvent) Type ¶
func (e LogDataEvent) Type() uint32
type LogMonitor ¶
type LogMonitor struct {
// contains filtered or unexported fields
}
func NewLogMonitor ¶
func NewLogMonitor() *LogMonitor
func NewLogMonitorWriter ¶
func NewLogMonitorWriter(stdout io.Writer) *LogMonitor
func (*LogMonitor) Clear ¶
func (w *LogMonitor) Clear()
Clear releases the buffer memory, making it eligible for GC. The buffer will be lazily re-allocated on the next Write.
func (*LogMonitor) Debug ¶
func (w *LogMonitor) Debug(msg string)
func (*LogMonitor) Debugf ¶
func (w *LogMonitor) Debugf(format string, args ...interface{})
func (*LogMonitor) Error ¶
func (w *LogMonitor) Error(msg string)
func (*LogMonitor) Errorf ¶
func (w *LogMonitor) Errorf(format string, args ...interface{})
func (*LogMonitor) GetHistory ¶
func (w *LogMonitor) GetHistory() []byte
func (*LogMonitor) Info ¶
func (w *LogMonitor) Info(msg string)
func (*LogMonitor) Infof ¶
func (w *LogMonitor) Infof(format string, args ...interface{})
func (*LogMonitor) OnLogData ¶
func (w *LogMonitor) OnLogData(callback func(data []byte)) context.CancelFunc
func (*LogMonitor) SetLogLevel ¶
func (w *LogMonitor) SetLogLevel(level LogLevel)
func (*LogMonitor) SetLogTimeFormat ¶
func (w *LogMonitor) SetLogTimeFormat(timeFormat string)
func (*LogMonitor) SetPrefix ¶
func (w *LogMonitor) SetPrefix(prefix string)
func (*LogMonitor) Warn ¶
func (w *LogMonitor) Warn(msg string)
func (*LogMonitor) Warnf ¶
func (w *LogMonitor) Warnf(format string, args ...interface{})
type ModelPreloadedEvent ¶
func (ModelPreloadedEvent) Type ¶
func (e ModelPreloadedEvent) Type() uint32
type PeerProxy ¶
type PeerProxy struct {
// contains filtered or unexported fields
}
func NewPeerProxy ¶
func NewPeerProxy(peers config.PeerDictionaryConfig, proxyLogger *LogMonitor) (*PeerProxy, error)
func (*PeerProxy) HasPeerModel ¶
func (*PeerProxy) ListPeers ¶
func (p *PeerProxy) ListPeers() config.PeerDictionaryConfig
func (*PeerProxy) ProxyRequest ¶
type Process ¶
type Process struct {
ID string
// contains filtered or unexported fields
}
func NewProcess ¶
func NewProcess(ID string, healthCheckTimeout int, config config.ModelConfig, processLogger *LogMonitor, proxyLogger *LogMonitor) *Process
func (*Process) CurrentState ¶
func (p *Process) CurrentState() ProcessState
func (*Process) LogMonitor ¶
func (p *Process) LogMonitor() *LogMonitor
LogMonitor returns the log monitor associated with the process.
func (*Process) Logger ¶
func (p *Process) Logger() *LogMonitor
Logger returns the logger for this process.
func (*Process) ProxyRequest ¶
func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request)
func (*Process) Shutdown ¶
func (p *Process) Shutdown()
Shutdown is called when llama-swap is shutting down. It will give a little bit of time for any inflight requests to complete before shutting down. If the Process is in the state of starting, it will cancel it and shut it down. Once a process is in the StateShutdown state, it can not be started again.
func (*Process) Stop ¶
func (p *Process) Stop()
Stop will wait for inflight requests to complete before stopping the process.
func (*Process) StopImmediately ¶
func (p *Process) StopImmediately()
StopImmediately will transition the process to the stopping state and stop the process with a SIGTERM. If the process does not stop within the specified timeout, it will be forcefully stopped with a SIGKILL.
type ProcessGroup ¶
func NewProcessGroup ¶
func NewProcessGroup(id string, config config.Config, proxyLogger *LogMonitor, upstreamLogger *LogMonitor) *ProcessGroup
func (*ProcessGroup) GetMember ¶
func (pg *ProcessGroup) GetMember(modelName string) (*Process, bool)
func (*ProcessGroup) HasMember ¶
func (pg *ProcessGroup) HasMember(modelName string) bool
func (*ProcessGroup) ProxyRequest ¶
func (pg *ProcessGroup) ProxyRequest(modelID string, writer http.ResponseWriter, request *http.Request) error
ProxyRequest proxies a request to the specified model
func (*ProcessGroup) Shutdown ¶
func (pg *ProcessGroup) Shutdown()
func (*ProcessGroup) StopProcess ¶
func (pg *ProcessGroup) StopProcess(modelID string, strategy StopStrategy) error
func (*ProcessGroup) StopProcesses ¶
func (pg *ProcessGroup) StopProcesses(strategy StopStrategy)
type ProcessState ¶
type ProcessState string
const ( StateStopped ProcessState = ProcessState("stopped") StateStarting ProcessState = ProcessState("starting") StateReady ProcessState = ProcessState("ready") StateStopping ProcessState = ProcessState("stopping") // process is shutdown and will not be restarted StateShutdown ProcessState = ProcessState("shutdown") )
type ProcessStateChangeEvent ¶
type ProcessStateChangeEvent struct {
ProcessName string
NewState ProcessState
OldState ProcessState
}
func (ProcessStateChangeEvent) Type ¶
func (e ProcessStateChangeEvent) Type() uint32
type ProxyManager ¶
func New ¶
func New(proxyConfig config.Config) *ProxyManager
func (*ProxyManager) ListModelsHandler ¶
func (pm *ProxyManager) ListModelsHandler(c *gin.Context)
func (*ProxyManager) ListRunningProcessesHandler ¶
func (pm *ProxyManager) ListRunningProcessesHandler(context *gin.Context)
func (*ProxyManager) ProxyOAIPostFormHandler ¶
func (pm *ProxyManager) ProxyOAIPostFormHandler(c *gin.Context)
func (*ProxyManager) ProxyToFirstRunningProcess ¶
func (pm *ProxyManager) ProxyToFirstRunningProcess(c *gin.Context)
ProxyToFirstRunningProcess forwards the request to the any running process (llama-server)
func (*ProxyManager) ServeHTTP ¶
func (pm *ProxyManager) ServeHTTP(w http.ResponseWriter, r *http.Request)
ServeHTTP implements http.Handler interface
func (*ProxyManager) SetVersion ¶
func (pm *ProxyManager) SetVersion(buildDate string, commit string, version string)
func (*ProxyManager) Shutdown ¶
func (pm *ProxyManager) Shutdown()
Shutdown stops all processes managed by this ProxyManager
func (*ProxyManager) StopProcesses ¶
func (pm *ProxyManager) StopProcesses(strategy StopStrategy)
StopProcesses acquires a lock and stops all running upstream processes. This is the public method safe for concurrent calls. Unlike Shutdown, this method only stops the processes but doesn't perform a complete shutdown, allowing for process replacement without full termination.
func (*ProxyManager) StreamLogsHandler ¶
func (pm *ProxyManager) StreamLogsHandler(c *gin.Context)
func (*ProxyManager) UnloadAllModelsHandler ¶
func (pm *ProxyManager) UnloadAllModelsHandler(c *gin.Context)
type ReloadingState ¶
type ReloadingState int
const ( ReloadingStateStart ReloadingState = iota ReloadingStateEnd )
type StopStrategy ¶
type StopStrategy int
const ( StopImmediately StopStrategy = iota StopWaitForInflightRequest )
type TokenMetrics ¶
type TokenMetrics struct {
ID int `json:"id"`
Timestamp time.Time `json:"timestamp"`
Model string `json:"model"`
CachedTokens int `json:"cache_tokens"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
PromptPerSecond float64 `json:"prompt_per_second"`
TokensPerSecond float64 `json:"tokens_per_second"`
DurationMs int `json:"duration_ms"`
}
TokenMetrics represents parsed token statistics from llama-server logs
type TokenMetricsEvent ¶
type TokenMetricsEvent struct {
Metrics TokenMetrics
}
TokenMetricsEvent represents a token metrics event
func (TokenMetricsEvent) Type ¶
func (e TokenMetricsEvent) Type() uint32