speech

package
v0.0.0-...-758c32e Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 15, 2025 License: MIT Imports: 4 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Detector

type Detector struct {
	// contains filtered or unexported fields
}

func NewDetector

func NewDetector(cfg DetectorConfig) (*Detector, error)

func (*Detector) Destroy

func (sd *Detector) Destroy() error

func (*Detector) Detect

func (sd *Detector) Detect(pcm []float32) ([]Segment, error)

func (*Detector) Infer

func (sd *Detector) Infer(pcm []float32, state []float32) (float32, error)

func (*Detector) Reset

func (sd *Detector) Reset() error

func (*Detector) SetThreshold

func (sd *Detector) SetThreshold(value float32)

type DetectorConfig

type DetectorConfig struct {
	// The path to the ONNX Silero VAD model file to load.
	ModelPath string
	// The sampling rate of the input audio samples. Supported values are 8000 and 16000.
	SampleRate int
	// The probability threshold above which we detect speech. A good default is 0.5.
	Threshold float32
	// The duration of silence to wait for each speech segment before separating it.
	MinSilenceDurationMs int
	// The padding to add to speech segments to avoid aggressive cutting.
	SpeechPadMs int
	// The loglevel for the onnx environment, by default it is set to LogLevelWarn.
	LogLevel LogLevel
}

func (DetectorConfig) IsValid

func (c DetectorConfig) IsValid() error

type LogLevel

type LogLevel int
const (
	LevelVerbose LogLevel = iota + 1
	LogLevelInfo
	LogLevelWarn
	LogLevelError
	LogLevelFatal
)

func (LogLevel) OrtLoggingLevel

func (l LogLevel) OrtLoggingLevel() C.OrtLoggingLevel

type Segment

type Segment struct {
	// The relative timestamp in seconds of when a speech segment begins.
	SpeechStartAt float64
	// The relative timestamp in seconds of when a speech segment ends.
	SpeechEndAt float64
}

Segment contains timing information of a speech segment.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL