tuple

package

v0.1.0-RC1 Latest Latest Go to latest Published: Dec 31, 2025 License: Apache-2.0 Imports: 14 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/apache/datasketches-go

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func IsDissimilar[S Summary](actual, expected Sketch[S], policy Policy[S], threshold float64, seed uint64) (bool, error)
func IsExactlyEqual[S Summary](a, b Sketch[S], policy Policy[S], seed uint64) (bool, error)
func IsSimilar[S Summary](actual, expected Sketch[S], policy Policy[S], threshold float64, seed uint64) (bool, error)
type CompactSketch
- func ANotB[S Summary](a, b Sketch[S], seed uint64, ordered bool) (*CompactSketch[S], error)
- func Decode[S Summary](b []byte, seed uint64, read SummaryReader[S]) (*CompactSketch[S], error)
- func NewCompactSketch[S Summary](other Sketch[S], ordered bool) (*CompactSketch[S], error)
- func NewCompactSketchFromThetaSketch[S Summary](sketch theta.Sketch, summary S, ordered bool) (*CompactSketch[S], error)
- func TupleANotThetaB[S Summary](a Sketch[S], b theta.Sketch, seed uint64, ordered bool) (*CompactSketch[S], error)
- func (s *CompactSketch[S]) All() iter.Seq2[uint64, S]
- func (s *CompactSketch[S]) Estimate() float64
- func (s *CompactSketch[S]) Filter(predicate func(S) bool) (*CompactSketch[S], error)
- func (s *CompactSketch[S]) IsEmpty() bool
- func (s *CompactSketch[S]) IsEstimationMode() bool
- func (s *CompactSketch[S]) IsOrdered() bool
- func (s *CompactSketch[S]) LowerBound(numStdDevs uint8) (float64, error)
- func (s *CompactSketch[S]) LowerBoundFromSubset(numStdDevs uint8, numSubsetEntries uint32) (float64, error)
- func (s *CompactSketch[S]) NumRetained() uint32
- func (s *CompactSketch[S]) SeedHash() (uint16, error)
- func (s *CompactSketch[S]) String(printItems bool) string
- func (s *CompactSketch[S]) Theta() float64
- func (s *CompactSketch[S]) Theta64() uint64
- func (s *CompactSketch[S]) UpperBound(numStdDevs uint8) (float64, error)
- func (s *CompactSketch[S]) UpperBoundFromSubset(numStdDevs uint8, numSubsetEntries uint32) (float64, error)
type Decoder
- func NewDecoder[S Summary](seed uint64, read SummaryReader[S]) Decoder[S]
- func (dec *Decoder[S]) Decode(r io.Reader) (*CompactSketch[S], error)
type Encoder
- func NewEncoder[S Summary](w io.Writer, write SummaryWriter[S]) Encoder[S]
- func (enc *Encoder[S]) Encode(sketch *CompactSketch[S]) error
type Intersection
- func NewIntersection[S Summary](policy Policy[S], opts ...IntersectionOptionFunc) *Intersection[S]
- func (i *Intersection[S]) HasResult() bool
- func (i *Intersection[S]) OrderedResult() (*CompactSketch[S], error)
- func (i *Intersection[S]) Policy() Policy[S]
- func (i *Intersection[S]) Result(ordered bool) (*CompactSketch[S], error)
- func (i *Intersection[S]) Update(sketch Sketch[S]) error
type IntersectionOptionFunc
- func WithIntersectionSeed(seed uint64) IntersectionOptionFunc
type JaccardSimilarityResult
- func Jaccard[S Summary](a, b Sketch[S], policy Policy[S], seed uint64) (JaccardSimilarityResult, error)
type Policy
type Sketch
type Summary
type SummaryReader
type SummaryWriter
type Union
- func NewUnion[S Summary](policy Policy[S], opts ...UnionOptionFunc) (*Union[S], error)
- func (u *Union[S]) OrderedResult() (*CompactSketch[S], error)
- func (u *Union[S]) Policy() Policy[S]
- func (u *Union[S]) Reset()
- func (u *Union[S]) Result(ordered bool) (*CompactSketch[S], error)
- func (u *Union[S]) Update(sketch Sketch[S]) error
type UnionOptionFunc
- func WithUnionLgK(lgK uint8) UnionOptionFunc
- func WithUnionResizeFactor(rf theta.ResizeFactor) UnionOptionFunc
- func WithUnionSeed(seed uint64) UnionOptionFunc
- func WithUnionSketchP(p float32) UnionOptionFunc
type UpdatableSummary
type UpdateSketch
- func NewUpdateSketch[S UpdatableSummary[V], V any](newSummaryFunc func() S, opts ...UpdateSketchOptionFunc) (*UpdateSketch[S, V], error)
- func (s *UpdateSketch[S, V]) All() iter.Seq2[uint64, S]
- func (s *UpdateSketch[S, V]) Compact(ordered bool) (*CompactSketch[S], error)
- func (s *UpdateSketch[S, V]) Estimate() float64
- func (s *UpdateSketch[S, V]) Filter(predicate func(S) bool) (*CompactSketch[S], error)
- func (s *UpdateSketch[S, V]) IsEmpty() bool
- func (s *UpdateSketch[S, V]) IsEstimationMode() bool
- func (s *UpdateSketch[S, V]) IsOrdered() bool
- func (s *UpdateSketch[S, V]) LgK() uint8
- func (s *UpdateSketch[S, V]) LowerBound(numStdDevs uint8) (float64, error)
- func (s *UpdateSketch[S, V]) LowerBoundFromSubset(numStdDevs uint8, numSubsetEntries uint32) (float64, error)
- func (s *UpdateSketch[S, V]) NumRetained() uint32
- func (s *UpdateSketch[S, V]) Reset()
- func (s *UpdateSketch[S, V]) ResizeFactor() theta.ResizeFactor
- func (s *UpdateSketch[S, V]) SeedHash() (uint16, error)
- func (s *UpdateSketch[S, V]) String(shouldPrintItems bool) string
- func (s *UpdateSketch[S, V]) Theta() float64
- func (s *UpdateSketch[S, V]) Theta64() uint64
- func (s *UpdateSketch[S, V]) Trim()
- func (s *UpdateSketch[S, V]) UpdateBytes(data []byte, value V) error
- func (s *UpdateSketch[S, V]) UpdateFloat32(key float32, value V) error
- func (s *UpdateSketch[S, V]) UpdateFloat64(key float64, value V) error
- func (s *UpdateSketch[S, V]) UpdateInt16(key int16, value V) error
- func (s *UpdateSketch[S, V]) UpdateInt32(key int32, value V) error
- func (s *UpdateSketch[S, V]) UpdateInt64(key int64, value V) error
- func (s *UpdateSketch[S, V]) UpdateInt8(key int8, value V) error
- func (s *UpdateSketch[S, V]) UpdateString(key string, value V) error
- func (s *UpdateSketch[S, V]) UpdateUint16(key uint16, value V) error
- func (s *UpdateSketch[S, V]) UpdateUint32(key uint32, value V) error
- func (s *UpdateSketch[S, V]) UpdateUint64(key uint64, value V) error
- func (s *UpdateSketch[S, V]) UpdateUint8(key uint8, value V) error
- func (s *UpdateSketch[S, V]) UpperBound(numStdDevs uint8) (float64, error)
- func (s *UpdateSketch[S, V]) UpperBoundFromSubset(numStdDevs uint8, numSubsetEntries uint32) (float64, error)
type UpdateSketchOptionFunc
- func WithUpdateSketchLgK(lgK uint8) UpdateSketchOptionFunc
- func WithUpdateSketchP(p float32) UpdateSketchOptionFunc
- func WithUpdateSketchResizeFactor(rf theta.ResizeFactor) UpdateSketchOptionFunc
- func WithUpdateSketchSeed(seed uint64) UpdateSketchOptionFunc

Constants ¶

View Source

const (
	SerialVersionLegacy = uint8(1)
	SerialVersion       = uint8(3)
	SketchFamily        = uint8(9)
	SketchType          = uint8(1)
	SketchTypeLegacy    = uint8(5)
)

Variables ¶

View Source

var (
	ErrKeyNotFound                = errors.New("key not found")
	ErrKeyNotFoundAndNoEmptySlots = errors.New("key not found and no empty slots")
	// ErrZeroHashValue is used to indicate that the hash value is zero.
	// Zero is a reserved value for empty slots in the hash table.
	ErrZeroHashValue    = errors.New("zero hash value")
	ErrHashExceedsTheta = errors.New("hash exceeds theta")
)

View Source

var (
	ErrUpdateEmptyString = errors.New("cannot update empty string")
)

Functions ¶

func IsDissimilar ¶

func IsDissimilar[S Summary](actual, expected Sketch[S], policy Policy[S], threshold float64, seed uint64) (bool, error)

IsDissimilar tests dissimilarity of an actual Sketch against an expected Sketch. It computes the upper bound of the Jaccard index J_UB of the actual and expected sketches. If J_UB <= threshold, then the sketches are considered to be dissimilar with a confidence of 97.7%. The actual parameter is the sketch to be tested, and expected is the reference sketch that is considered to be correct. The threshold should be a real value between zero and one. The seed parameter should match the seed used to create the sketches. It returns true if the dissimilarity of the two sketches is greater than the given threshold with at least 97.7% confidence.

func IsExactlyEqual ¶

func IsExactlyEqual[S Summary](a, b Sketch[S], policy Policy[S], seed uint64) (bool, error)

IsExactlyEqual returns true if the two given sketches are equivalent. The seed parameter should match the seed used to create sketchA and sketchB.

func IsSimilar ¶

func IsSimilar[S Summary](actual, expected Sketch[S], policy Policy[S], threshold float64, seed uint64) (bool, error)

IsSimilar tests similarity of an actual Sketch against an expected Sketch. It computes the lower bound of the Jaccard index J_LB of the actual and expected sketches. If J_LB >= threshold, then the sketches are considered to be similar with a confidence of 97.7%. The actual parameter is the sketch to be tested, and expected is the reference sketch that is considered to be correct. The threshold should be a real value between zero and one. The seed parameter should match the seed used to create the sketches. It returns true if the similarity of the two sketches is greater than the given threshold with at least 97.7% confidence.

Types ¶

type CompactSketch ¶

type CompactSketch[S Summary] struct {
	// contains filtered or unexported fields
}

CompactSketch is the immutable, serializable form of a tuple sketch.

func ANotB ¶

func ANotB[S Summary](a, b Sketch[S], seed uint64, ordered bool) (*CompactSketch[S], error)

ANotB computes the set difference of two Tuple sketches.

func Decode ¶

func Decode[S Summary](b []byte, seed uint64, read SummaryReader[S]) (*CompactSketch[S], error)

Decode reconstructs a CompactSketch from a byte slice using a specified seed and read function.

func NewCompactSketch ¶

func NewCompactSketch[S Summary](other Sketch[S], ordered bool) (*CompactSketch[S], error)

NewCompactSketch creates a new CompactSketch from any sketch implementing the Sketch interface.

func NewCompactSketchFromThetaSketch ¶

func NewCompactSketchFromThetaSketch[S Summary](
	sketch theta.Sketch, summary S, ordered bool,
) (*CompactSketch[S], error)

NewCompactSketchFromThetaSketch creates a new CompactSketch from a given theta.Sketch and a summary.

func TupleANotThetaB ¶

func TupleANotThetaB[S Summary](a Sketch[S], b theta.Sketch, seed uint64, ordered bool) (*CompactSketch[S], error)

TupleANotThetaB computes the set difference of Tuple sketch from Theta sketch.

func (*CompactSketch[S]) All ¶

func (s *CompactSketch[S]) All() iter.Seq2[uint64, S]

All returns an iterator over all hash-summary pairs in the sketch.

func (*CompactSketch[S]) Estimate ¶

func (s *CompactSketch[S]) Estimate() float64

Estimate returns the estimated distinct count of the input stream.

func (*CompactSketch[S]) Filter ¶

func (s *CompactSketch[S]) Filter(predicate func(S) bool) (*CompactSketch[S], error)

Filter produces a new CompactSketch by applying a predicate to each entry. The predicate should return true for entries to keep.

func (*CompactSketch[S]) IsEmpty ¶

func (s *CompactSketch[S]) IsEmpty() bool

IsEmpty reports whether this sketch represents an empty set. Note: this is not the same as having no retained hashes.

func (*CompactSketch[S]) IsEstimationMode ¶

func (s *CompactSketch[S]) IsEstimationMode() bool

IsEstimationMode reports whether the sketch is in estimation mode, as opposed to exact mode.

func (*CompactSketch[S]) IsOrdered ¶

func (s *CompactSketch[S]) IsOrdered() bool

IsOrdered reports whether retained hashes are sorted by hash value.

func (*CompactSketch[S]) LowerBound ¶

func (s *CompactSketch[S]) LowerBound(numStdDevs uint8) (float64, error)

LowerBound returns the approximate lower error bound for the given number of standard deviations. numStdDevs should be 1, 2, or 3 for approximately 67%, 95%, or 99% confidence intervals.

func (*CompactSketch[S]) LowerBoundFromSubset ¶

func (s *CompactSketch[S]) LowerBoundFromSubset(numStdDevs uint8, numSubsetEntries uint32) (float64, error)

LowerBoundFromSubset returns the approximate lower error bound for the given number of standard deviations over a subset of retained hashes. numStdDevs specifies the confidence level (1, 2, or 3) corresponding to approximately 67%, 95%, or 99% confidence intervals. numSubsetEntries specifies number of items from {0, 1, ..., get_num_retained()} over which to estimate the bound.

func (*CompactSketch[S]) NumRetained ¶

func (s *CompactSketch[S]) NumRetained() uint32

NumRetained returns the number of hashes retained in the sketch.

func (*CompactSketch[S]) SeedHash ¶

func (s *CompactSketch[S]) SeedHash() (uint16, error)

SeedHash returns the hash of the seed used to hash the input.

func (*CompactSketch[S]) String ¶

func (s *CompactSketch[S]) String(printItems bool) string

String returns a human-readable summary of this sketch. If printItems is true, the output includes all retained hashes.

func (*CompactSketch[S]) Theta ¶

func (s *CompactSketch[S]) Theta() float64

Theta returns theta as a fraction from 0 to 1, representing the effective sampling rate.

func (*CompactSketch[S]) Theta64 ¶

func (s *CompactSketch[S]) Theta64() uint64

Theta64 returns theta as a positive integer between 0 and math.MaxUint64.

func (*CompactSketch[S]) UpperBound ¶

func (s *CompactSketch[S]) UpperBound(numStdDevs uint8) (float64, error)

UpperBound returns the approximate upper error bound for the given number of standard deviations. numStdDevs should be 1, 2, or 3 for approximately 67%, 95%, or 99% confidence intervals.

func (*CompactSketch[S]) UpperBoundFromSubset ¶

func (s *CompactSketch[S]) UpperBoundFromSubset(numStdDevs uint8, numSubsetEntries uint32) (float64, error)

UpperBoundFromSubset returns the approximate upper error bound for the given number of standard deviations over a subset of retained hashes. numStdDevs specifies the confidence level (1, 2, or 3) corresponding to approximately 67%, 95%, or 99% confidence intervals. numSubsetEntries specifies number of items from {0, 1, ..., get_num_retained()} over which to estimate the bound.

type Decoder ¶

type Decoder[S Summary] struct {
	// contains filtered or unexported fields
}

Decoder decodes a compact sketch from the given reader.

func NewDecoder ¶

func NewDecoder[S Summary](seed uint64, read SummaryReader[S]) Decoder[S]

NewDecoder creates a new decoder.

func (*Decoder[S]) Decode ¶

func (dec *Decoder[S]) Decode(r io.Reader) (*CompactSketch[S], error)

Decode decodes a compact sketch from the given reader.

type Encoder ¶

type Encoder[S Summary] struct {
	// contains filtered or unexported fields
}

Encoder encodes a compact tuple sketch to bytes.

func NewEncoder ¶

func NewEncoder[S Summary](w io.Writer, write SummaryWriter[S]) Encoder[S]

NewEncoder creates a new encoder.

func (*Encoder[S]) Encode ¶

func (enc *Encoder[S]) Encode(sketch *CompactSketch[S]) error

Encode encodes a compact tuple sketch to bytes.

type Intersection ¶

type Intersection[S Summary] struct {
	// contains filtered or unexported fields
}

Intersection computes the intersection of sketches.

func NewIntersection ¶

func NewIntersection[S Summary](policy Policy[S], opts ...IntersectionOptionFunc) *Intersection[S]

NewIntersection creates a new intersection.

func (*Intersection[S]) HasResult ¶

func (i *Intersection[S]) HasResult() bool

HasResult returns true if the state of the intersection is defined.

func (*Intersection[S]) OrderedResult ¶

func (i *Intersection[S]) OrderedResult() (*CompactSketch[S], error)

OrderedResult produces a copy of the current state of the intersection.

func (*Intersection[S]) Policy ¶

func (i *Intersection[S]) Policy() Policy[S]

Policy returns the policy for processing matched summary during intersection.

func (*Intersection[S]) Result ¶

func (i *Intersection[S]) Result(ordered bool) (*CompactSketch[S], error)

Result produces a copy of the current state of the intersection.

func (*Intersection[S]) Update ¶

func (i *Intersection[S]) Update(sketch Sketch[S]) error

Update updates the intersection with a given sketch.

type IntersectionOptionFunc ¶

type IntersectionOptionFunc func(*intersectionOptions)

func WithIntersectionSeed ¶

func WithIntersectionSeed(seed uint64) IntersectionOptionFunc

WithIntersectionSeed sets the seed for the hash function.

type JaccardSimilarityResult ¶

type JaccardSimilarityResult struct {
	LowerBound float64
	Estimate   float64
	UpperBound float64
}

JaccardSimilarityResult represents the result of Jaccard similarity computation with lower bound, estimate, and upper bound

func Jaccard ¶

func Jaccard[S Summary](a, b Sketch[S], policy Policy[S], seed uint64) (JaccardSimilarityResult, error)

Jaccard computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index J(A,B) = (A ∩ B)/(A ∪ B) is used to measure how similar the two sketches are to each other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are disjoint. A Jaccard of 0.95 means the overlap between the two sets is 95% of the union of the two sets.

The seed parameter should match the seed used to create sketchA and sketchB. The returned JaccardSimilarityResult contains LowerBound, Estimate, and UpperBound of the Jaccard index. The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations.

Note: For very large pairs of sketches, where the configured nominal entries of the sketches are 2^25 or 2^26, this method may produce unpredictable results.

type Policy ¶

type Policy[S Summary] interface {
	// Apply is called when a matching summary is found.
	// internalSummary: the summary already in
	// incomingSummary: the matching summary from the incoming sketch
	Apply(internalSummary S, incomingEntry S)
}

Policy defines a policy for processing matched summary.

type Sketch ¶

type Sketch[S Summary] interface {
	// IsEmpty reports whether this sketch represents an empty set.
	// Note: this is not the same as having no retained hashes.
	IsEmpty() bool

	// Estimate returns the estimated distinct count of the input stream.
	Estimate() float64

	// LowerBoundFromSubset returns the approximate lower error bound for
	// the given number of standard deviations over a subset of retained hashes.
	// numStdDevs specifies the confidence level (1, 2, or 3) corresponding to
	// approximately 67%, 95%, or 99% confidence intervals.
	// numSubsetEntries specifies number of items from {0, 1, ..., get_num_retained()}
	// over which to estimate the bound.
	LowerBoundFromSubset(numStdDevs uint8, numSubsetEntries uint32) (float64, error)

	// LowerBound returns the approximate lower error bound for the given
	// number of standard deviations. numStdDevs should be 1, 2, or 3 for
	// approximately 67%, 95%, or 99% confidence intervals.
	LowerBound(numStdDevs uint8) (float64, error)

	// UpperBoundFromSubset returns the approximate upper error bound for
	// the given number of standard deviations over a subset of retained hashes.
	// numStdDevs specifies the confidence level (1, 2, or 3) corresponding to
	// approximately 67%, 95%, or 99% confidence intervals.
	// numSubsetEntries specifies number of items from {0, 1, ..., get_num_retained()}
	// over which to estimate the bound.
	UpperBoundFromSubset(numStdDevs uint8, numSubsetEntries uint32) (float64, error)

	// UpperBound returns the approximate upper error bound for the given
	// number of standard deviations. numStdDevs should be 1, 2, or 3 for
	// approximately 67%, 95%, or 99% confidence intervals.
	UpperBound(numStdDevs uint8) (float64, error)

	// IsEstimationMode reports whether the sketch is in estimation mode,
	// as opposed to exact mode.
	IsEstimationMode() bool

	// Theta returns theta as a fraction from 0 to 1, representing the
	// effective sampling rate.
	Theta() float64

	// Theta64 returns theta as a positive integer between 0 and math.MaxUint64.
	Theta64() uint64

	// NumRetained returns the number of hashes retained in the sketch.
	NumRetained() uint32

	// SeedHash returns the hash of the seed used to hash the input.
	SeedHash() (uint16, error)

	// IsOrdered reports whether retained hashes are sorted by hash value.
	IsOrdered() bool

	// String returns a human-readable summary of this sketch.
	// If printItems is true, the output includes all retained hashes.
	String(shouldPrintItems bool) string

	// All returns an iterator over all hash-summary pairs in the sketch.
	All() iter.Seq2[uint64, S]
}

Sketch is the base interface for tuple sketches. It extends Theta sketch to associate arbitrary summaries with each retained key.

type Summary ¶

type Summary interface {
	// Reset clears the content of the summary, restoring it to its initial state.
	Reset()
	// Clone creates and returns a deep copy of the current Summary instance.
	Clone() Summary
}

Summary is the base interface for all summary types used in tuple sketches. A summary holds aggregate data associated with each retained hash key.

type SummaryReader ¶

type SummaryReader[S Summary] func(r io.Reader) (S, error)

SummaryReader reads and returns a summary from the reader. Implementations should read the format written by a corresponding SummaryWriter.

type SummaryWriter ¶

type SummaryWriter[S Summary] func(w io.Writer, s S) error

SummaryWriter writes a summary to the writer. Implementations should write the summary in a format that can be read by a corresponding SummaryReader.

type Union ¶

type Union[S Summary] struct {
	// contains filtered or unexported fields
}

Union computes the union of Tuple sketches.

func NewUnion ¶

func NewUnion[S Summary](policy Policy[S], opts ...UnionOptionFunc) (*Union[S], error)

NewUnion creates a new union with the given options

func (*Union[S]) OrderedResult ¶

func (u *Union[S]) OrderedResult() (*CompactSketch[S], error)

OrderedResult produces a copy of the current state of the Union as an ordered compact sketch

func (*Union[S]) Policy ¶

func (u *Union[S]) Policy() Policy[S]

Policy returns the policy used by this union

func (*Union[S]) Reset ¶

func (u *Union[S]) Reset()

Reset resets the union to the initial empty state

func (*Union[S]) Result ¶

func (u *Union[S]) Result(ordered bool) (*CompactSketch[S], error)

Result produces a copy of the current state of the Union as a compact sketch

func (*Union[S]) Update ¶

func (u *Union[S]) Update(sketch Sketch[S]) error

Update adds a sketch to the union

type UnionOptionFunc ¶

type UnionOptionFunc func(*unionOptions)

func WithUnionLgK ¶

func WithUnionLgK(lgK uint8) UnionOptionFunc

WithUnionLgK sets log2(k), where k is a nominal number of entries in the union

func WithUnionResizeFactor ¶

func WithUnionResizeFactor(rf theta.ResizeFactor) UnionOptionFunc

WithUnionResizeFactor sets a resize factor for the internal hash table (defaults to 8)

func WithUnionSeed ¶

func WithUnionSeed(seed uint64) UnionOptionFunc

WithUnionSeed sets the seed for the hash function. Should be used carefully if needed. Union produced with different seeds are not compatible and cannot be mixed in set operations.

func WithUnionSketchP ¶

func WithUnionSketchP(p float32) UnionOptionFunc

WithUnionSketchP sets sampling probability (initial theta). The default is 1, so the union retains all entries until it reaches the limit, at which point it goes into the estimation mode and reduces the effective sampling probability (theta) as necessary

type UpdatableSummary ¶

type UpdatableSummary[V any] interface {
	Summary

	// Update incorporates a new value into the summary, modifying its internal state based on the given input value.
	Update(value V)
}

UpdatableSummary represents a summary that can be updated with values of type V.

type UpdateSketch ¶

type UpdateSketch[S UpdatableSummary[V], V any] struct {
	// contains filtered or unexported fields
}

UpdateSketch builds Tuple sketch from input data via update methods.

func NewUpdateSketch ¶

func NewUpdateSketch[S UpdatableSummary[V], V any](
	newSummaryFunc func() S, opts ...UpdateSketchOptionFunc,
) (*UpdateSketch[S, V], error)

NewUpdateSketch initializes and returns a new instance of UpdateSketch with the specified parameters.

func (*UpdateSketch[S, V]) All ¶

func (s *UpdateSketch[S, V]) All() iter.Seq2[uint64, S]

All returns an iterator over all hash-summary pairs in the sketch.

func (*UpdateSketch[S, V]) Compact ¶

func (s *UpdateSketch[S, V]) Compact(ordered bool) (*CompactSketch[S], error)

Compact compacts this sketch to a compact sketch.

func (*UpdateSketch[S, V]) Estimate ¶

func (s *UpdateSketch[S, V]) Estimate() float64

Estimate returns the estimated distinct count of the input stream.

func (*UpdateSketch[S, V]) Filter ¶

func (s *UpdateSketch[S, V]) Filter(predicate func(S) bool) (*CompactSketch[S], error)

Filter produces a CompactSketch from this sketch by applying a given predicate to each entry. The predicate should return true for entries to keep.

func (*UpdateSketch[S, V]) IsEmpty ¶

func (s *UpdateSketch[S, V]) IsEmpty() bool

IsEmpty reports whether this sketch represents an empty set.

func (*UpdateSketch[S, V]) IsEstimationMode ¶

func (s *UpdateSketch[S, V]) IsEstimationMode() bool

IsEstimationMode reports whether the sketch is in estimation mode, as opposed to exact mode.

func (*UpdateSketch[S, V]) IsOrdered ¶

func (s *UpdateSketch[S, V]) IsOrdered() bool

IsOrdered reports whether retained hashes are sorted by hash value.

func (*UpdateSketch[S, V]) LgK ¶

func (s *UpdateSketch[S, V]) LgK() uint8

LgK returns a configured nominal number of entries in the sketch

func (*UpdateSketch[S, V]) LowerBound ¶

func (s *UpdateSketch[S, V]) LowerBound(numStdDevs uint8) (float64, error)

LowerBound returns the approximate lower error bound for the given number of standard deviations. numStdDevs should be 1, 2, or 3 for approximately 67%, 95%, or 99% confidence intervals.

func (*UpdateSketch[S, V]) LowerBoundFromSubset ¶

func (s *UpdateSketch[S, V]) LowerBoundFromSubset(numStdDevs uint8, numSubsetEntries uint32) (float64, error)

LowerBoundFromSubset returns the approximate lower error bound for the given number of standard deviations over a subset of retained hashes. numStdDevs specifies the confidence level (1, 2, or 3) corresponding to approximately 67%, 95%, or 99% confidence intervals. numSubsetEntries specifies number of items from {0, 1, ..., get_num_retained()} over which to estimate the bound.

func (*UpdateSketch[S, V]) NumRetained ¶

func (s *UpdateSketch[S, V]) NumRetained() uint32

NumRetained returns the number of hashes retained in the sketch.

func (*UpdateSketch[S, V]) Reset ¶

func (s *UpdateSketch[S, V]) Reset()

Reset resets the sketch to the initial empty state

func (*UpdateSketch[S, V]) ResizeFactor ¶

func (s *UpdateSketch[S, V]) ResizeFactor() theta.ResizeFactor

ResizeFactor returns a configured resize factor of the sketch

func (*UpdateSketch[S, V]) SeedHash ¶

func (s *UpdateSketch[S, V]) SeedHash() (uint16, error)

SeedHash returns the hash of the seed used to hash the input.

func (*UpdateSketch[S, V]) String ¶

func (s *UpdateSketch[S, V]) String(shouldPrintItems bool) string

String returns a human-readable summary of this sketch. If printItems is true, the output includes all retained hashes.

func (*UpdateSketch[S, V]) Theta ¶

func (s *UpdateSketch[S, V]) Theta() float64

Theta returns theta as a fraction from 0 to 1, representing the effective sampling rate.

func (*UpdateSketch[S, V]) Theta64 ¶

func (s *UpdateSketch[S, V]) Theta64() uint64

Theta64 returns theta as a positive integer between 0 and math.MaxUint64.

func (*UpdateSketch[S, V]) Trim ¶

func (s *UpdateSketch[S, V]) Trim()

Trim removes retained entries in excess of the nominal size k (if any)

func (*UpdateSketch[S, V]) UpdateBytes ¶

func (s *UpdateSketch[S, V]) UpdateBytes(data []byte, value V) error

UpdateBytes updates this sketch with given data

func (*UpdateSketch[S, V]) UpdateFloat32 ¶

func (s *UpdateSketch[S, V]) UpdateFloat32(key float32, value V) error

UpdateFloat32 updates this sketch with a given floating point value

func (*UpdateSketch[S, V]) UpdateFloat64 ¶

func (s *UpdateSketch[S, V]) UpdateFloat64(key float64, value V) error

UpdateFloat64 updates this sketch with a given double-precision floating point value

func (*UpdateSketch[S, V]) UpdateInt16 ¶

func (s *UpdateSketch[S, V]) UpdateInt16(key int16, value V) error

UpdateInt16 updates this sketch with a given signed 16-bit integer

func (*UpdateSketch[S, V]) UpdateInt32 ¶

func (s *UpdateSketch[S, V]) UpdateInt32(key int32, value V) error

UpdateInt32 updates this sketch with a given signed 32-bit integer

func (*UpdateSketch[S, V]) UpdateInt64 ¶

func (s *UpdateSketch[S, V]) UpdateInt64(key int64, value V) error

UpdateInt64 updates this sketch with a given signed 64-bit integer

func (*UpdateSketch[S, V]) UpdateInt8 ¶

func (s *UpdateSketch[S, V]) UpdateInt8(key int8, value V) error

UpdateInt8 updates this sketch with a given signed 8-bit integer

func (*UpdateSketch[S, V]) UpdateString ¶

func (s *UpdateSketch[S, V]) UpdateString(key string, value V) error

UpdateString updates this sketch with a given string

func (*UpdateSketch[S, V]) UpdateUint16 ¶

func (s *UpdateSketch[S, V]) UpdateUint16(key uint16, value V) error

UpdateUint16 updates this sketch with a given unsigned 16-bit integer

func (*UpdateSketch[S, V]) UpdateUint32 ¶

func (s *UpdateSketch[S, V]) UpdateUint32(key uint32, value V) error

UpdateUint32 updates this sketch with a given unsigned 32-bit integer

func (*UpdateSketch[S, V]) UpdateUint64 ¶

func (s *UpdateSketch[S, V]) UpdateUint64(key uint64, value V) error

UpdateUint64 updates this sketch with a given unsigned 64-bit integer

func (*UpdateSketch[S, V]) UpdateUint8 ¶

func (s *UpdateSketch[S, V]) UpdateUint8(key uint8, value V) error

UpdateUint8 updates this sketch with a given unsigned 8-bit integer

func (*UpdateSketch[S, V]) UpperBound ¶

func (s *UpdateSketch[S, V]) UpperBound(numStdDevs uint8) (float64, error)

UpperBound returns the approximate upper error bound for the given number of standard deviations. numStdDevs should be 1, 2, or 3 for approximately 67%, 95%, or 99% confidence intervals.

func (*UpdateSketch[S, V]) UpperBoundFromSubset ¶

func (s *UpdateSketch[S, V]) UpperBoundFromSubset(numStdDevs uint8, numSubsetEntries uint32) (float64, error)

UpperBoundFromSubset returns the approximate upper error bound for the given number of standard deviations over a subset of retained hashes. numStdDevs specifies the confidence level (1, 2, or 3) corresponding to approximately 67%, 95%, or 99% confidence intervals. numSubsetEntries specifies number of items from {0, 1, ..., get_num_retained()} over which to estimate the bound.

type UpdateSketchOptionFunc ¶

type UpdateSketchOptionFunc func(*updateSketchOptions)

func WithUpdateSketchLgK ¶

func WithUpdateSketchLgK(lgK uint8) UpdateSketchOptionFunc

WithUpdateSketchLgK sets log2(k), where k is a nominal number of entries in the sketch

func WithUpdateSketchP ¶

func WithUpdateSketchP(p float32) UpdateSketchOptionFunc

WithUpdateSketchP sets sampling probability (initial theta). The default is 1, so the sketch retains all entries until it reaches the limit, at which point it goes into the estimation mode and reduces the effective sampling probability (theta) as necessary

func WithUpdateSketchResizeFactor ¶

func WithUpdateSketchResizeFactor(rf theta.ResizeFactor) UpdateSketchOptionFunc

WithUpdateSketchResizeFactor sets a resize factor for the internal hash table (defaults to 8)

func WithUpdateSketchSeed ¶

func WithUpdateSketchSeed(seed uint64) UpdateSketchOptionFunc

WithUpdateSketchSeed sets the seed for the hash function. Should be used carefully if needed. Sketches produced with different seed are not compatible and cannot be mixed in set operations.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL