Documentation
¶
Index ¶
- Constants
- Variables
- func EstimatePromptTokens(model string, messages []provider.Message) (int, error)
- func GetModelContextWindow(model string) int
- func IsNonRetryableError(err error) bool
- func IsRetryableError(err error) bool
- type APIError
- type CacheConfig
- type CacheEntry
- type CacheHitError
- type CacheManager
- func (m *CacheManager) BuildCacheKey(req *provider.ChatCompletionRequest) string
- func (m *CacheManager) Config() CacheConfig
- func (m *CacheManager) Delete(ctx context.Context, req *provider.ChatCompletionRequest) error
- func (m *CacheManager) Get(ctx context.Context, req *provider.ChatCompletionRequest) (*CacheEntry, error)
- func (m *CacheManager) Set(ctx context.Context, req *provider.ChatCompletionRequest, ...) error
- func (m *CacheManager) ShouldCache(req *provider.ChatCompletionRequest) bool
- type CacheStats
- type ChatClient
- func (c *ChatClient) AppendMessage(ctx context.Context, sessionID string, message provider.Message) error
- func (c *ChatClient) Cache() *CacheManager
- func (c *ChatClient) Close() error
- func (c *ChatClient) CreateChatCompletion(ctx context.Context, req *provider.ChatCompletionRequest) (*provider.ChatCompletionResponse, error)
- func (c *ChatClient) CreateChatCompletionStream(ctx context.Context, req *provider.ChatCompletionRequest) (provider.ChatCompletionStream, error)
- func (c *ChatClient) CreateChatCompletionStreamWithMemory(ctx context.Context, sessionID string, req *provider.ChatCompletionRequest) (provider.ChatCompletionStream, error)
- func (c *ChatClient) CreateChatCompletionWithMemory(ctx context.Context, sessionID string, req *provider.ChatCompletionRequest) (*provider.ChatCompletionResponse, error)
- func (c *ChatClient) CreateConversationWithSystemMessage(ctx context.Context, sessionID, systemMessage string) error
- func (c *ChatClient) DeleteConversation(ctx context.Context, sessionID string) error
- func (c *ChatClient) GetConversationMessages(ctx context.Context, sessionID string) ([]provider.Message, error)
- func (c *ChatClient) HasCache() bool
- func (c *ChatClient) HasMemory() bool
- func (c *ChatClient) LoadConversation(ctx context.Context, sessionID string) (*ConversationMemory, error)
- func (c *ChatClient) Logger() *slog.Logger
- func (c *ChatClient) Memory() *MemoryManager
- func (c *ChatClient) Provider() provider.Provider
- func (c *ChatClient) SaveConversation(ctx context.Context, conversation *ConversationMemory) error
- func (c *ChatClient) TokenEstimator() TokenEstimator
- type ChatCompletionChoice
- type ChatCompletionChunk
- type ChatCompletionRequest
- type ChatCompletionResponse
- type ChatCompletionStream
- type CircuitBreaker
- type CircuitBreakerConfig
- type CircuitBreakerStats
- type CircuitOpenError
- type CircuitState
- type ClientConfig
- type ConversationMemory
- type ErrorCategory
- type FallbackAttempt
- type FallbackError
- type FallbackProvider
- func (fp *FallbackProvider) CircuitBreaker(providerName string) *CircuitBreaker
- func (fp *FallbackProvider) Close() error
- func (fp *FallbackProvider) CreateChatCompletion(ctx context.Context, req *provider.ChatCompletionRequest) (*provider.ChatCompletionResponse, error)
- func (fp *FallbackProvider) CreateChatCompletionStream(ctx context.Context, req *provider.ChatCompletionRequest) (provider.ChatCompletionStream, error)
- func (fp *FallbackProvider) FallbackProviders() []provider.Provider
- func (fp *FallbackProvider) Name() string
- func (fp *FallbackProvider) PrimaryProvider() provider.Provider
- type FallbackProviderConfig
- type LLMCallInfo
- type MemoryConfig
- type MemoryManager
- func (m *MemoryManager) AppendMessage(ctx context.Context, sessionID string, message Message) error
- func (m *MemoryManager) AppendMessages(ctx context.Context, sessionID string, messages []Message) error
- func (m *MemoryManager) CreateConversationWithSystemMessage(ctx context.Context, sessionID, systemMessage string) error
- func (m *MemoryManager) DeleteConversation(ctx context.Context, sessionID string) error
- func (m *MemoryManager) GetMessages(ctx context.Context, sessionID string) ([]Message, error)
- func (m *MemoryManager) LoadConversation(ctx context.Context, sessionID string) (*ConversationMemory, error)
- func (m *MemoryManager) SaveConversation(ctx context.Context, conversation *ConversationMemory) error
- func (m *MemoryManager) SetMetadata(ctx context.Context, sessionID string, metadata map[string]any) error
- type Message
- type ModelInfo
- type ObservabilityHook
- type Provider
- type ProviderConfig
- type ProviderName
- type Role
- type TokenEstimator
- type TokenEstimatorConfig
- type TokenLimitError
- type TokenValidation
- type Tool
- type ToolCall
- type ToolFunction
- type ToolSpec
- type Usage
Constants ¶
const ( EnvVarAnthropicAPIKey = "ANTHROPIC_API_KEY" // #nosec G101 EnvVarOpenAIAPIKey = "OPENAI_API_KEY" // #nosec G101 EnvVarGeminiAPIKey = "GEMINI_API_KEY" // #nosec G101 EnvVarXAIAPIKey = "XAI_API_KEY" // #nosec G101 )
const ( // Bedrock Models - Re-exported from models package ModelBedrockClaude3Opus = models.BedrockClaude3Opus ModelBedrockClaude3Sonnet = models.BedrockClaude3Sonnet ModelBedrockClaudeOpus4 = models.BedrockClaudeOpus4 ModelBedrockTitan = models.BedrockTitan // Claude Models - Re-exported from models package ModelClaudeOpus4_1 = models.ClaudeOpus4_1 ModelClaudeOpus4 = models.ClaudeOpus4 ModelClaudeSonnet4 = models.ClaudeSonnet4 ModelClaude3_7Sonnet = models.Claude3_7Sonnet ModelClaude3_5Haiku = models.Claude3_5Haiku ModelClaude3Opus = models.Claude3Opus ModelClaude3Sonnet = models.Claude3Sonnet ModelClaude3Haiku = models.Claude3Haiku // Gemini Models - Re-exported from models package ModelGemini2_5Pro = models.Gemini2_5Pro ModelGemini2_5Flash = models.Gemini2_5Flash ModelGeminiLive2_5Flash = models.GeminiLive2_5Flash ModelGemini1_5Pro = models.Gemini1_5Pro ModelGemini1_5Flash = models.Gemini1_5Flash ModelGeminiPro = models.GeminiPro // Ollama Models - Re-exported from models package ModelOllamaLlama3_8B = models.OllamaLlama3_8B ModelOllamaLlama3_70B = models.OllamaLlama3_70B ModelOllamaMistral7B = models.OllamaMistral7B ModelOllamaMixtral8x7B = models.OllamaMixtral8x7B ModelOllamaCodeLlama = models.OllamaCodeLlama ModelOllamaGemma2B = models.OllamaGemma2B ModelOllamaGemma7B = models.OllamaGemma7B ModelOllamaQwen2_5 = models.OllamaQwen2_5 ModelOllamaDeepSeek = models.OllamaDeepSeek // OpenAI Models - Re-exported from models package ModelGPT5 = models.GPT5 ModelGPT5Mini = models.GPT5Mini ModelGPT5Nano = models.GPT5Nano ModelGPT5ChatLatest = models.GPT5ChatLatest ModelGPT4_1 = models.GPT4_1 ModelGPT4_1Mini = models.GPT4_1Mini ModelGPT4_1Nano = models.GPT4_1Nano ModelGPT4o = models.GPT4o ModelGPT4oMini = models.GPT4oMini ModelGPT4Turbo = models.GPT4Turbo ModelGPT35Turbo = models.GPT35Turbo // Vertex AI Models - Re-exported from models package ModelVertexClaudeOpus4 = models.VertexClaudeOpus4 // X.AI Grok Models - Re-exported from models package // Grok 4.1 (Latest - November 2025) ModelGrok4_1FastReasoning = models.Grok4_1FastReasoning ModelGrok4_1FastNonReasoning = models.Grok4_1FastNonReasoning // Grok 4 (July 2025) ModelGrok4_0709 = models.Grok4_0709 ModelGrok4FastReasoning = models.Grok4FastReasoning ModelGrok4FastNonReasoning = models.Grok4FastNonReasoning ModelGrokCodeFast1 = models.GrokCodeFast1 // Grok 3 ModelGrok3 = models.Grok3 ModelGrok3Mini = models.Grok3Mini // Grok 2 ModelGrok2_1212 = models.Grok2_1212 ModelGrok2_Vision = models.Grok2_Vision // Deprecated models ModelGrokBeta = models.GrokBeta ModelGrokVision = models.GrokVision )
Common model constants for each provider.
NOTE: For new code, prefer importing "github.com/agentplexus/omnillm/models" directly for better organization and documentation. These constants are maintained for backwards compatibility with existing code.
const ( RoleSystem = provider.RoleSystem RoleUser = provider.RoleUser RoleAssistant = provider.RoleAssistant RoleTool = provider.RoleTool )
Role constants for convenience
Variables ¶
var ( // Common errors ErrUnsupportedProvider = errors.New("unsupported provider") ErrBedrockExternal = errors.New("bedrock provider moved to github.com/agentplexus/omnillm-bedrock; use CustomProvider to inject it") ErrInvalidConfiguration = errors.New("invalid configuration") ErrNoProviders = errors.New("at least one provider must be configured") ErrEmptyAPIKey = errors.New("API key cannot be empty") ErrEmptyModel = errors.New("model cannot be empty") ErrEmptyMessages = errors.New("messages cannot be empty") ErrStreamClosed = errors.New("stream is closed") ErrInvalidResponse = errors.New("invalid response format") ErrRateLimitExceeded = errors.New("rate limit exceeded") ErrQuotaExceeded = errors.New("quota exceeded") ErrInvalidRequest = errors.New("invalid request") ErrModelNotFound = errors.New("model not found") ErrServerError = errors.New("server error") ErrNetworkError = errors.New("network error") )
Functions ¶
func EstimatePromptTokens ¶ added in v0.11.0
EstimatePromptTokens is a convenience function that creates a default estimator and estimates tokens for a set of messages.
func GetModelContextWindow ¶ added in v0.11.0
GetModelContextWindow is a convenience function that returns the context window for a model using the default estimator.
func IsNonRetryableError ¶ added in v0.11.0
IsNonRetryableError returns true if the error is permanent and retrying won't help.
func IsRetryableError ¶ added in v0.11.0
IsRetryableError returns true if the error is transient and the request can be retried. This is useful for fallback provider logic - only retry on retryable errors.
Types ¶
type APIError ¶
type APIError struct {
StatusCode int `json:"status_code"`
Message string `json:"message"`
Type string `json:"type"`
Code string `json:"code"`
Provider ProviderName `json:"provider"`
}
APIError represents an error response from the API
func NewAPIError ¶
func NewAPIError(provider ProviderName, statusCode int, message, errorType, code string) *APIError
NewAPIError creates a new API error
type CacheConfig ¶ added in v0.11.0
type CacheConfig struct {
// TTL is the time-to-live for cached responses.
// Default: 1 hour
TTL time.Duration
// KeyPrefix is the prefix for cache keys in the KVS.
// Default: "omnillm:cache"
KeyPrefix string
// SkipStreaming skips caching for streaming requests.
// Default: true (streaming responses are not cached)
SkipStreaming bool
// CacheableModels limits caching to specific models.
// If nil or empty, all models are cached.
CacheableModels []string
// ExcludeParameters lists parameters to exclude from cache key calculation.
// Common exclusions: "user" (user ID shouldn't affect cache)
// Default: ["user"]
ExcludeParameters []string
// IncludeTemperature includes temperature in cache key.
// Set to false if you want to cache regardless of temperature setting.
// Default: true
IncludeTemperature bool
// IncludeSeed includes seed in cache key.
// Default: true
IncludeSeed bool
}
CacheConfig configures response caching behavior
func DefaultCacheConfig ¶ added in v0.11.0
func DefaultCacheConfig() CacheConfig
DefaultCacheConfig returns a CacheConfig with sensible defaults
type CacheEntry ¶ added in v0.11.0
type CacheEntry struct {
// Response is the cached chat completion response
Response *provider.ChatCompletionResponse `json:"response"`
// CachedAt is when the response was cached
CachedAt time.Time `json:"cached_at"`
// ExpiresAt is when the cache entry expires
ExpiresAt time.Time `json:"expires_at"`
// Model is the model used for the request
Model string `json:"model"`
// RequestHash is the hash of the request (for verification)
RequestHash string `json:"request_hash"`
}
CacheEntry represents a cached response with metadata
func (*CacheEntry) IsExpired ¶ added in v0.11.0
func (e *CacheEntry) IsExpired() bool
IsExpired returns true if the cache entry has expired
type CacheHitError ¶ added in v0.11.0
type CacheHitError struct {
Entry *CacheEntry
}
CacheHitError is a marker type to indicate a cache hit (not an actual error)
func (*CacheHitError) Error ¶ added in v0.11.0
func (e *CacheHitError) Error() string
type CacheManager ¶ added in v0.11.0
type CacheManager struct {
// contains filtered or unexported fields
}
CacheManager handles response caching using a KVS backend
func NewCacheManager ¶ added in v0.11.0
func NewCacheManager(kvsClient kvs.Client, config CacheConfig) *CacheManager
NewCacheManager creates a new cache manager with the given KVS client and configuration. If config has zero values, defaults are used for those fields.
func (*CacheManager) BuildCacheKey ¶ added in v0.11.0
func (m *CacheManager) BuildCacheKey(req *provider.ChatCompletionRequest) string
BuildCacheKey generates a deterministic cache key for a request. The key is a hash of the normalized request parameters.
func (*CacheManager) Config ¶ added in v0.11.0
func (m *CacheManager) Config() CacheConfig
Config returns the cache configuration
func (*CacheManager) Delete ¶ added in v0.11.0
func (m *CacheManager) Delete(ctx context.Context, req *provider.ChatCompletionRequest) error
Delete removes a cache entry for the given request.
func (*CacheManager) Get ¶ added in v0.11.0
func (m *CacheManager) Get(ctx context.Context, req *provider.ChatCompletionRequest) (*CacheEntry, error)
Get retrieves a cached response for the given request. Returns nil if no valid cache entry exists.
func (*CacheManager) Set ¶ added in v0.11.0
func (m *CacheManager) Set(ctx context.Context, req *provider.ChatCompletionRequest, resp *provider.ChatCompletionResponse) error
Set stores a response in the cache for the given request.
func (*CacheManager) ShouldCache ¶ added in v0.11.0
func (m *CacheManager) ShouldCache(req *provider.ChatCompletionRequest) bool
ShouldCache determines if a request should be cached. Returns false for streaming requests (if configured), non-cacheable models, etc.
type CacheStats ¶ added in v0.11.0
CacheStats contains statistics about cache usage
type ChatClient ¶
type ChatClient struct {
// contains filtered or unexported fields
}
ChatClient is the main client interface that wraps a Provider
func NewClient ¶
func NewClient(config ClientConfig) (*ChatClient, error)
NewClient creates a new ChatClient based on the provider
func (*ChatClient) AppendMessage ¶
func (c *ChatClient) AppendMessage(ctx context.Context, sessionID string, message provider.Message) error
AppendMessage appends a message to a conversation in memory
func (*ChatClient) Cache ¶ added in v0.11.0
func (c *ChatClient) Cache() *CacheManager
Cache returns the cache manager (nil if not configured)
func (*ChatClient) CreateChatCompletion ¶
func (c *ChatClient) CreateChatCompletion(ctx context.Context, req *provider.ChatCompletionRequest) (*provider.ChatCompletionResponse, error)
CreateChatCompletion creates a chat completion
func (*ChatClient) CreateChatCompletionStream ¶
func (c *ChatClient) CreateChatCompletionStream(ctx context.Context, req *provider.ChatCompletionRequest) (provider.ChatCompletionStream, error)
CreateChatCompletionStream creates a streaming chat completion
func (*ChatClient) CreateChatCompletionStreamWithMemory ¶
func (c *ChatClient) CreateChatCompletionStreamWithMemory(ctx context.Context, sessionID string, req *provider.ChatCompletionRequest) (provider.ChatCompletionStream, error)
CreateChatCompletionStreamWithMemory creates a streaming chat completion using conversation memory
func (*ChatClient) CreateChatCompletionWithMemory ¶
func (c *ChatClient) CreateChatCompletionWithMemory(ctx context.Context, sessionID string, req *provider.ChatCompletionRequest) (*provider.ChatCompletionResponse, error)
CreateChatCompletionWithMemory creates a chat completion using conversation memory
func (*ChatClient) CreateConversationWithSystemMessage ¶
func (c *ChatClient) CreateConversationWithSystemMessage(ctx context.Context, sessionID, systemMessage string) error
CreateConversationWithSystemMessage creates a new conversation with a system message
func (*ChatClient) DeleteConversation ¶
func (c *ChatClient) DeleteConversation(ctx context.Context, sessionID string) error
DeleteConversation removes a conversation from memory
func (*ChatClient) GetConversationMessages ¶
func (c *ChatClient) GetConversationMessages(ctx context.Context, sessionID string) ([]provider.Message, error)
GetConversationMessages retrieves messages from a conversation
func (*ChatClient) HasCache ¶ added in v0.11.0
func (c *ChatClient) HasCache() bool
HasCache returns true if caching is configured
func (*ChatClient) HasMemory ¶
func (c *ChatClient) HasMemory() bool
HasMemory returns true if memory is configured
func (*ChatClient) LoadConversation ¶
func (c *ChatClient) LoadConversation(ctx context.Context, sessionID string) (*ConversationMemory, error)
LoadConversation loads a conversation from memory
func (*ChatClient) Logger ¶
func (c *ChatClient) Logger() *slog.Logger
Logger returns the client's logger
func (*ChatClient) Memory ¶
func (c *ChatClient) Memory() *MemoryManager
Memory returns the memory manager (nil if not configured)
func (*ChatClient) Provider ¶
func (c *ChatClient) Provider() provider.Provider
Provider returns the underlying provider
func (*ChatClient) SaveConversation ¶
func (c *ChatClient) SaveConversation(ctx context.Context, conversation *ConversationMemory) error
SaveConversation saves a conversation to memory
func (*ChatClient) TokenEstimator ¶ added in v0.11.0
func (c *ChatClient) TokenEstimator() TokenEstimator
TokenEstimator returns the token estimator (nil if not configured)
type ChatCompletionChoice ¶
type ChatCompletionChoice = provider.ChatCompletionChoice
type ChatCompletionChunk ¶
type ChatCompletionChunk = provider.ChatCompletionChunk
type ChatCompletionRequest ¶
type ChatCompletionRequest = provider.ChatCompletionRequest
type ChatCompletionResponse ¶
type ChatCompletionResponse = provider.ChatCompletionResponse
type ChatCompletionStream ¶
type ChatCompletionStream = provider.ChatCompletionStream
ChatCompletionStream is an alias to the provider.ChatCompletionStream interface for backward compatibility
type CircuitBreaker ¶ added in v0.11.0
type CircuitBreaker struct {
// contains filtered or unexported fields
}
CircuitBreaker implements the circuit breaker pattern for provider health tracking
func NewCircuitBreaker ¶ added in v0.11.0
func NewCircuitBreaker(config CircuitBreakerConfig) *CircuitBreaker
NewCircuitBreaker creates a new circuit breaker with the given configuration. If config has zero values, defaults are used for those fields.
func (*CircuitBreaker) AllowRequest ¶ added in v0.11.0
func (cb *CircuitBreaker) AllowRequest() bool
AllowRequest returns true if the request should be allowed to proceed. In closed state, always allows. In open state, allows only after timeout. In half-open state, allows a limited number of test requests.
func (*CircuitBreaker) RecordFailure ¶ added in v0.11.0
func (cb *CircuitBreaker) RecordFailure()
RecordFailure records a failed request. May open the circuit if thresholds are exceeded.
func (*CircuitBreaker) RecordSuccess ¶ added in v0.11.0
func (cb *CircuitBreaker) RecordSuccess()
RecordSuccess records a successful request. In half-open state, may close the circuit if enough successes.
func (*CircuitBreaker) Reset ¶ added in v0.11.0
func (cb *CircuitBreaker) Reset()
Reset resets the circuit breaker to closed state with cleared counters
func (*CircuitBreaker) State ¶ added in v0.11.0
func (cb *CircuitBreaker) State() CircuitState
State returns the current state of the circuit breaker
func (*CircuitBreaker) Stats ¶ added in v0.11.0
func (cb *CircuitBreaker) Stats() CircuitBreakerStats
Stats returns current statistics for monitoring
type CircuitBreakerConfig ¶ added in v0.11.0
type CircuitBreakerConfig struct {
// FailureThreshold is the number of consecutive failures before opening the circuit.
// Default: 5
FailureThreshold int
// SuccessThreshold is the number of consecutive successes in half-open state
// required to close the circuit.
// Default: 2
SuccessThreshold int
// Timeout is how long to wait in open state before transitioning to half-open.
// Default: 30 seconds
Timeout time.Duration
// FailureRateThreshold triggers circuit open when the failure rate exceeds this value (0-1).
// Only evaluated after MinimumRequests is reached.
// Default: 0.5 (50%)
FailureRateThreshold float64
// MinimumRequests is the minimum number of requests before failure rate is evaluated.
// Default: 10
MinimumRequests int
}
CircuitBreakerConfig configures circuit breaker behavior
func DefaultCircuitBreakerConfig ¶ added in v0.11.0
func DefaultCircuitBreakerConfig() CircuitBreakerConfig
DefaultCircuitBreakerConfig returns a CircuitBreakerConfig with sensible defaults
type CircuitBreakerStats ¶ added in v0.11.0
type CircuitBreakerStats struct {
State CircuitState
ConsecutiveFailures int
ConsecutiveSuccesses int
TotalRequests int
TotalFailures int
FailureRate float64
LastFailure time.Time
LastStateChange time.Time
}
CircuitBreakerStats contains statistics about the circuit breaker
type CircuitOpenError ¶ added in v0.11.0
type CircuitOpenError struct {
Provider string
State CircuitState
LastFailure time.Time
RetryAfter time.Duration
}
CircuitOpenError is returned when a request is rejected due to open circuit
func (*CircuitOpenError) Error ¶ added in v0.11.0
func (e *CircuitOpenError) Error() string
type CircuitState ¶ added in v0.11.0
type CircuitState int
CircuitState represents the state of a circuit breaker
const ( // CircuitClosed indicates normal operation - requests pass through CircuitClosed CircuitState = iota // CircuitOpen indicates the circuit is open - requests fail fast CircuitOpen // CircuitHalfOpen indicates the circuit is testing recovery CircuitHalfOpen )
func (CircuitState) String ¶ added in v0.11.0
func (s CircuitState) String() string
String returns the string representation of the circuit state
type ClientConfig ¶
type ClientConfig struct {
// Providers is an ordered list of providers. Index 0 is the primary provider,
// and indices 1+ are fallback providers tried in order on retryable errors.
// This is the preferred way to configure providers.
//
// Example:
// Providers: []ProviderConfig{
// {Provider: ProviderNameOpenAI, APIKey: "openai-key"}, // Primary
// {Provider: ProviderNameAnthropic, APIKey: "anthropic-key"}, // Fallback 1
// {Provider: ProviderNameGemini, APIKey: "gemini-key"}, // Fallback 2
// }
//
// For custom providers, use CustomProvider field in ProviderConfig:
// Providers: []ProviderConfig{
// {CustomProvider: myCustomProvider},
// }
Providers []ProviderConfig
// CircuitBreakerConfig configures circuit breaker behavior for fallback providers.
// If nil (default), circuit breaker is disabled.
// When enabled, providers that fail repeatedly are temporarily skipped.
CircuitBreakerConfig *CircuitBreakerConfig
// Memory configuration (optional)
Memory kvs.Client
MemoryConfig *MemoryConfig
// ObservabilityHook is called before/after LLM calls (optional)
ObservabilityHook ObservabilityHook
// Logger for internal logging (optional, defaults to null logger)
Logger *slog.Logger
// TokenEstimator enables pre-flight token estimation (optional).
// Use NewTokenEstimator() to create one with custom configuration.
TokenEstimator TokenEstimator
// ValidateTokens enables automatic token validation before requests.
// When true and TokenEstimator is set, requests that would exceed
// the model's context window are rejected with TokenLimitError.
// Default: false
ValidateTokens bool
// Cache is the KVS client for response caching (optional).
// If provided, identical requests will return cached responses.
// Uses the same kvs.Client interface as Memory.
Cache kvs.Client
// CacheConfig configures response caching behavior.
// If nil, DefaultCacheConfig() is used when Cache is provided.
CacheConfig *CacheConfig
}
ClientConfig holds configuration for creating a client
type ConversationMemory ¶
type ConversationMemory struct {
SessionID string `json:"session_id"`
Messages []Message `json:"messages"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
Metadata map[string]any `json:"metadata,omitempty"`
}
ConversationMemory represents stored conversation data
type ErrorCategory ¶ added in v0.11.0
type ErrorCategory int
ErrorCategory classifies errors for retry/fallback logic
const ( // ErrorCategoryUnknown indicates the error type could not be determined ErrorCategoryUnknown ErrorCategory = iota // ErrorCategoryRetryable indicates the error is transient and the request can be retried // Examples: rate limits (429), server errors (5xx), network errors ErrorCategoryRetryable // ErrorCategoryNonRetryable indicates the error is permanent and retrying won't help // Examples: auth errors (401/403), invalid requests (400), not found (404) ErrorCategoryNonRetryable )
func ClassifyError ¶ added in v0.11.0
func ClassifyError(err error) ErrorCategory
ClassifyError determines the category of an error for retry/fallback decisions
func (ErrorCategory) String ¶ added in v0.11.0
func (c ErrorCategory) String() string
String returns the string representation of the error category
type FallbackAttempt ¶ added in v0.11.0
type FallbackAttempt struct {
// Provider is the name of the provider that was tried
Provider string
// Error is the error returned, or nil on success
Error error
// Duration is how long the attempt took
Duration time.Duration
// Skipped indicates the provider was skipped (e.g., circuit open)
Skipped bool
}
FallbackAttempt records information about a single fallback attempt
type FallbackError ¶ added in v0.11.0
type FallbackError struct {
// Attempts contains information about each provider attempt
Attempts []FallbackAttempt
// LastError is the last error encountered
LastError error
}
FallbackError is returned when all providers fail
func (*FallbackError) Error ¶ added in v0.11.0
func (e *FallbackError) Error() string
func (*FallbackError) Unwrap ¶ added in v0.11.0
func (e *FallbackError) Unwrap() error
type FallbackProvider ¶ added in v0.11.0
type FallbackProvider struct {
// contains filtered or unexported fields
}
FallbackProvider wraps multiple providers with fallback logic. It implements provider.Provider and tries providers in order until one succeeds.
func NewFallbackProvider ¶ added in v0.11.0
func NewFallbackProvider( primary provider.Provider, fallbacks []provider.Provider, config *FallbackProviderConfig, ) *FallbackProvider
NewFallbackProvider creates a provider that tries fallbacks on failure. The primary provider is tried first, then fallbacks in order.
func (*FallbackProvider) CircuitBreaker ¶ added in v0.11.0
func (fp *FallbackProvider) CircuitBreaker(providerName string) *CircuitBreaker
CircuitBreaker returns the circuit breaker for a provider, or nil if not configured
func (*FallbackProvider) Close ¶ added in v0.11.0
func (fp *FallbackProvider) Close() error
Close closes all providers
func (*FallbackProvider) CreateChatCompletion ¶ added in v0.11.0
func (fp *FallbackProvider) CreateChatCompletion( ctx context.Context, req *provider.ChatCompletionRequest, ) (*provider.ChatCompletionResponse, error)
CreateChatCompletion tries the primary provider first, then fallbacks on retryable errors.
func (*FallbackProvider) CreateChatCompletionStream ¶ added in v0.11.0
func (fp *FallbackProvider) CreateChatCompletionStream( ctx context.Context, req *provider.ChatCompletionRequest, ) (provider.ChatCompletionStream, error)
CreateChatCompletionStream tries the primary provider first, then fallbacks on retryable errors.
func (*FallbackProvider) FallbackProviders ¶ added in v0.11.0
func (fp *FallbackProvider) FallbackProviders() []provider.Provider
FallbackProviders returns the fallback providers
func (*FallbackProvider) Name ¶ added in v0.11.0
func (fp *FallbackProvider) Name() string
Name returns a composite name indicating fallback configuration
func (*FallbackProvider) PrimaryProvider ¶ added in v0.11.0
func (fp *FallbackProvider) PrimaryProvider() provider.Provider
PrimaryProvider returns the primary provider
type FallbackProviderConfig ¶ added in v0.11.0
type FallbackProviderConfig struct {
// CircuitBreakerConfig configures circuit breaker behavior.
// If nil, circuit breaker is disabled.
CircuitBreakerConfig *CircuitBreakerConfig
// Logger for logging fallback events
Logger *slog.Logger
}
FallbackProviderConfig configures the fallback provider behavior
type LLMCallInfo ¶
type LLMCallInfo struct {
CallID string // Unique identifier for correlating BeforeRequest/AfterResponse
ProviderName string // e.g., "openai", "anthropic"
StartTime time.Time // When the call started
}
LLMCallInfo provides metadata about the LLM call for observability
type MemoryConfig ¶
type MemoryConfig struct {
// MaxMessages limits the number of messages to keep in memory per session
MaxMessages int
// TTL sets the time-to-live for stored conversations (0 for no expiration)
TTL time.Duration
// KeyPrefix allows customizing the key prefix for stored conversations
KeyPrefix string
}
MemoryConfig holds configuration for conversation memory
func DefaultMemoryConfig ¶
func DefaultMemoryConfig() MemoryConfig
DefaultMemoryConfig returns sensible defaults for memory configuration
type MemoryManager ¶
type MemoryManager struct {
// contains filtered or unexported fields
}
MemoryManager handles conversation persistence using KVS
func NewMemoryManager ¶
func NewMemoryManager(kvsClient kvs.Client, config MemoryConfig) *MemoryManager
NewMemoryManager creates a new memory manager with the given KVS client and config
func (*MemoryManager) AppendMessage ¶
AppendMessage adds a message to the conversation and saves it
func (*MemoryManager) AppendMessages ¶
func (m *MemoryManager) AppendMessages(ctx context.Context, sessionID string, messages []Message) error
AppendMessages adds multiple messages to the conversation and saves it
func (*MemoryManager) CreateConversationWithSystemMessage ¶
func (m *MemoryManager) CreateConversationWithSystemMessage(ctx context.Context, sessionID, systemMessage string) error
CreateConversationWithSystemMessage creates a new conversation with a system message
func (*MemoryManager) DeleteConversation ¶
func (m *MemoryManager) DeleteConversation(ctx context.Context, sessionID string) error
DeleteConversation removes a conversation from memory
func (*MemoryManager) GetMessages ¶
GetMessages returns just the messages from a conversation
func (*MemoryManager) LoadConversation ¶
func (m *MemoryManager) LoadConversation(ctx context.Context, sessionID string) (*ConversationMemory, error)
LoadConversation retrieves a conversation from memory
func (*MemoryManager) SaveConversation ¶
func (m *MemoryManager) SaveConversation(ctx context.Context, conversation *ConversationMemory) error
SaveConversation stores a conversation in memory
func (*MemoryManager) SetMetadata ¶
func (m *MemoryManager) SetMetadata(ctx context.Context, sessionID string, metadata map[string]any) error
SetMetadata sets metadata for a conversation
type ModelInfo ¶
type ModelInfo struct {
ID string `json:"id"`
Provider ProviderName `json:"provider"`
Name string `json:"name"`
MaxTokens int `json:"max_tokens"`
}
ModelInfo represents information about a model
func GetModelInfo ¶
GetModelInfo returns model information
type ObservabilityHook ¶
type ObservabilityHook interface {
// BeforeRequest is called before each LLM call.
// Returns a new context for trace/span propagation.
// The hook should not modify the request.
BeforeRequest(ctx context.Context, info LLMCallInfo, req *provider.ChatCompletionRequest) context.Context
// AfterResponse is called after each LLM call completes.
// This is called for both successful and failed requests.
AfterResponse(ctx context.Context, info LLMCallInfo, req *provider.ChatCompletionRequest, resp *provider.ChatCompletionResponse, err error)
// WrapStream wraps a stream for observability.
// This allows the hook to observe streaming responses.
// The returned stream must implement the same interface as the input.
//
// Note: For streaming, AfterResponse is only called if stream creation fails.
// To track streaming completion timing and content, the wrapper returned here
// should handle Close() or detect EOF in Recv() to finalize metrics/traces.
WrapStream(ctx context.Context, info LLMCallInfo, req *provider.ChatCompletionRequest, stream provider.ChatCompletionStream) provider.ChatCompletionStream
}
ObservabilityHook allows external packages to observe LLM calls. Implementations can use this to add tracing, logging, or metrics without modifying the core OmniLLM library.
type ProviderConfig ¶ added in v0.11.0
type ProviderConfig struct {
// Provider is the provider type (e.g., ProviderNameOpenAI).
// Ignored if CustomProvider is set.
Provider ProviderName
// APIKey is the API key for the provider
APIKey string
// BaseURL is an optional custom base URL
BaseURL string
// Region is for providers that require a region (e.g., AWS Bedrock)
Region string
// Timeout sets the HTTP client timeout for this provider
Timeout time.Duration
// HTTPClient is an optional custom HTTP client
HTTPClient *http.Client
// Extra holds provider-specific configuration
Extra map[string]any
// CustomProvider allows injecting a custom provider implementation.
// When set, Provider, APIKey, BaseURL, etc. are ignored.
CustomProvider provider.Provider
}
ProviderConfig holds configuration for a single provider instance. Used in the Providers slice where index 0 is primary and 1+ are fallbacks.
type ProviderName ¶
type ProviderName string
ProviderName represents the different LLM provider names
const ( ProviderNameOpenAI ProviderName = "openai" ProviderNameAnthropic ProviderName = "anthropic" ProviderNameBedrock ProviderName = "bedrock" ProviderNameOllama ProviderName = "ollama" ProviderNameGemini ProviderName = "gemini" ProviderNameXAI ProviderName = "xai" )
type TokenEstimator ¶ added in v0.11.0
type TokenEstimator interface {
// EstimateTokens estimates the token count for a set of messages.
// The estimate may not be exact but should be reasonably close.
EstimateTokens(model string, messages []provider.Message) (int, error)
// GetContextWindow returns the maximum context window size for a model.
// Returns 0 if the model is unknown.
GetContextWindow(model string) int
}
TokenEstimator estimates token counts for messages before sending to the API. This is useful for validating requests won't exceed model limits.
func NewTokenEstimator ¶ added in v0.11.0
func NewTokenEstimator(config TokenEstimatorConfig) TokenEstimator
NewTokenEstimator creates a new token estimator with the given configuration. If config has zero values, defaults are used for those fields.
type TokenEstimatorConfig ¶ added in v0.11.0
type TokenEstimatorConfig struct {
// CharactersPerToken is the average number of characters per token.
// Default: 4.0 (reasonable for English text)
// Lower values (e.g., 3.0) give more conservative estimates.
CharactersPerToken float64
// CustomContextWindows allows overriding context window sizes for specific models.
// Keys should be model IDs (e.g., "gpt-4o", "claude-3-opus").
CustomContextWindows map[string]int
// TokenOverheadPerMessage is extra tokens added per message for formatting.
// Default: 4 (accounts for role, separators, etc.)
TokenOverheadPerMessage int
}
TokenEstimatorConfig configures token estimation behavior
func DefaultTokenEstimatorConfig ¶ added in v0.11.0
func DefaultTokenEstimatorConfig() TokenEstimatorConfig
DefaultTokenEstimatorConfig returns a TokenEstimatorConfig with sensible defaults
type TokenLimitError ¶ added in v0.11.0
type TokenLimitError struct {
// EstimatedTokens is the estimated prompt token count
EstimatedTokens int
// ContextWindow is the model's maximum context window
ContextWindow int
// AvailableTokens is how many tokens are available (may be negative)
AvailableTokens int
// Model is the model ID
Model string
}
TokenLimitError is returned when a request exceeds token limits
func (*TokenLimitError) Error ¶ added in v0.11.0
func (e *TokenLimitError) Error() string
type TokenValidation ¶ added in v0.11.0
type TokenValidation struct {
// EstimatedTokens is the estimated prompt token count
EstimatedTokens int
// ContextWindow is the model's maximum context window
ContextWindow int
// MaxCompletionTokens is the requested max completion tokens
MaxCompletionTokens int
// AvailableTokens is how many tokens are available for completion
// (ContextWindow - EstimatedTokens)
AvailableTokens int
// ExceedsLimit is true if the prompt exceeds the context window
ExceedsLimit bool
// ExceedsWithCompletion is true if prompt + max_tokens exceeds context
ExceedsWithCompletion bool
}
TokenValidation contains the result of token validation
func ValidateTokens ¶ added in v0.11.0
func ValidateTokens( estimator TokenEstimator, model string, messages []provider.Message, maxCompletionTokens int, ) (*TokenValidation, error)
ValidateTokens checks if the request fits within model limits. Returns validation details including whether limits are exceeded.
type ToolFunction ¶
type ToolFunction = provider.ToolFunction
Source Files
¶
Directories
¶
| Path | Synopsis |
|---|---|
|
examples
|
|
|
anthropic_streaming
command
|
|
|
architecture_demo
command
|
|
|
basic
command
|
|
|
conversation
command
|
|
|
custom_provider
command
|
|
|
gemini
command
|
|
|
memory_demo
command
|
|
|
ollama
command
|
|
|
ollama_streaming
command
|
|
|
providers_demo
command
|
|
|
streaming
command
|
|
|
xai
command
|
|
|
Package models provides a comprehensive catalog of LLM model identifiers and documentation references for all supported providers.
|
Package models provides a comprehensive catalog of LLM model identifiers and documentation references for all supported providers. |
|
Package provider defines the core interfaces that external LLM providers must implement.
|
Package provider defines the core interfaces that external LLM providers must implement. |
|
providers
|
|
|
anthropic
Package anthropic provides Anthropic provider adapter for the OmniLLM unified interface
|
Package anthropic provides Anthropic provider adapter for the OmniLLM unified interface |
|
gemini
Package gemini provides Google Gemini provider adapter for the OmniLLM unified interface
|
Package gemini provides Google Gemini provider adapter for the OmniLLM unified interface |
|
ollama
Package ollama provides Ollama provider adapter for the OmniLLM unified interface
|
Package ollama provides Ollama provider adapter for the OmniLLM unified interface |
|
openai
Package openai provides OpenAI provider adapter for the OmniLLM unified interface
|
Package openai provides OpenAI provider adapter for the OmniLLM unified interface |
|
xai
Package xai provides X.AI Grok provider adapter for the OmniLLM unified interface
|
Package xai provides X.AI Grok provider adapter for the OmniLLM unified interface |
|
Package testing provides mock implementations for testing
|
Package testing provides mock implementations for testing |