Documentation
¶
Index ¶
- Constants
- func ExtractDocTime(docRoot *insaneJSON.Root) (time.Time, []string)
- func IsItBinaryEncodedMetaData(b []byte) bool
- func PutDocMetasCompressor(c *DocsMetasCompressor)
- type Active
- func (f *Active) Append(docs, metas []byte, wg *sync.WaitGroup) (err error)
- func (f *Active) AppendIDs(ids []seq.ID) []uint32
- func (f *Active) Contains(id seq.MID) bool
- func (f *Active) DataProvider(ctx context.Context) (DataProvider, func())
- func (f *Active) GetAllDocuments() []uint32
- func (f *Active) Info() *Info
- func (f *Active) IsIntersecting(from, to seq.MID) bool
- func (f *Active) Offload(context.Context, storage.Uploader) (bool, error)
- func (f *Active) Release()
- func (f *Active) Replay(ctx context.Context) error
- func (f *Active) String() string
- func (f *Active) Suicide()
- func (f *Active) UpdateStats(minMID, maxMID seq.MID, docCount uint32, sizeCount uint64)
- type ActiveIndexer
- type ActiveWriter
- type AggLimits
- type BlockInfo
- type BlockOffsets
- type Config
- type DataProvider
- type DiskBlocksProducer
- type DiskBlocksWriter
- type DocProvider
- type DocsMetasCompressor
- type DocsPositions
- type EmptyDataProvider
- type FileWriter
- type Fraction
- type IndexCache
- type Info
- type Loader
- type MetaData
- type MetaToken
- type PSD
- type PreloadedData
- type Remote
- func (f *Remote) Contains(mid seq.MID) bool
- func (f *Remote) DataProvider(ctx context.Context) (DataProvider, func())
- func (f *Remote) Info() *Info
- func (f *Remote) IsIntersecting(from, to seq.MID) bool
- func (f *Remote) Offload(context.Context, storage.Uploader) (bool, error)
- func (f *Remote) String() string
- func (f *Remote) Suicide()
- type SealParams
- type Sealed
- func (f *Sealed) Contains(id seq.MID) bool
- func (f *Sealed) DataProvider(ctx context.Context) (DataProvider, func())
- func (f *Sealed) Info() *Info
- func (f *Sealed) IsIntersecting(from, to seq.MID) bool
- func (f *Sealed) Offload(ctx context.Context, u storage.Uploader) (bool, error)
- func (f *Sealed) String() string
- func (f *Sealed) Suicide()
- type SearchConfig
- type SeqIDCmp
- type TokenLIDs
- type TokenList
- func (tl *TokenList) Append(tokens [][]byte, fieldsLengths []int, tokenLIDsPlaces []*TokenLIDs) []*TokenLIDs
- func (tl *TokenList) FindPattern(ctx context.Context, t parser.Token, tids []uint32) ([]uint32, error)
- func (tl *TokenList) GetAllTokenLIDs() *TokenLIDs
- func (tl *TokenList) GetFieldSizes() map[string]uint32
- func (tl *TokenList) GetTIDsByField(f string) []uint32
- func (tl *TokenList) GetValByTID(tid uint32) []byte
- func (tl *TokenList) Provide(tid uint32) *TokenLIDs
- func (tl *TokenList) Stop()
- type UInt64s
Constants ¶
const DistributionBucket = time.Minute
const DistributionMaxInterval = 24 * time.Hour
const DistributionSpreadThreshold = 10 * time.Minute
Variables ¶
This section is empty.
Functions ¶
func ExtractDocTime ¶
func ExtractDocTime(docRoot *insaneJSON.Root) (time.Time, []string)
ExtractDocTime extracts timestamp from doc It searches by one of supported field name and parses by supported formats If no field was found or not parsable it returns time.Now()
func PutDocMetasCompressor ¶
func PutDocMetasCompressor(c *DocsMetasCompressor)
Types ¶
type Active ¶
type Active struct {
Config *Config
BaseFileName string
MIDs *UInt64s
RIDs *UInt64s
DocBlocks *UInt64s
TokenList *TokenList
DocsPositions *DocsPositions
// contains filtered or unexported fields
}
func (*Active) DataProvider ¶
func (f *Active) DataProvider(ctx context.Context) (DataProvider, func())
func (*Active) GetAllDocuments ¶
type ActiveIndexer ¶
type ActiveIndexer struct {
// contains filtered or unexported fields
}
func NewActiveIndexer ¶
func NewActiveIndexer(workerCount, chLen int) *ActiveIndexer
func (*ActiveIndexer) Start ¶
func (ai *ActiveIndexer) Start()
func (*ActiveIndexer) Stop ¶
func (ai *ActiveIndexer) Stop()
type ActiveWriter ¶
type ActiveWriter struct {
// contains filtered or unexported fields
}
func NewActiveWriter ¶
func NewActiveWriter(docsFile, metaFile *os.File, docsOffset, metaOffset int64, skipFsync bool) *ActiveWriter
func (*ActiveWriter) Stop ¶
func (a *ActiveWriter) Stop()
type BlockOffsets ¶ added in v0.59.0
type BlockOffsets struct {
IDsTotal uint32 // todo: the best place for this field is Info block
Offsets []uint64
}
func (*BlockOffsets) Pack ¶ added in v0.59.0
func (b *BlockOffsets) Pack(buf []byte) []byte
func (*BlockOffsets) Unpack ¶ added in v0.59.0
func (b *BlockOffsets) Unpack(data []byte) error
type Config ¶
type Config struct {
Search SearchConfig
SkipSortDocs bool
KeepMetaFile bool
}
type DataProvider ¶
type DiskBlocksProducer ¶
type DiskBlocksProducer struct {
// contains filtered or unexported fields
}
func NewDiskBlocksProducer ¶
func NewDiskBlocksProducer() *DiskBlocksProducer
type DiskBlocksWriter ¶
type DiskBlocksWriter struct {
// contains filtered or unexported fields
}
func NewSealedBlockWriter ¶
func NewSealedBlockWriter(ws io.WriteSeeker) *DiskBlocksWriter
func (*DiskBlocksWriter) NewBlockFormer ¶
func (w *DiskBlocksWriter) NewBlockFormer(name string, size int) *storage.BlockFormer
func (*DiskBlocksWriter) WriteRegistryBlock ¶
func (w *DiskBlocksWriter) WriteRegistryBlock() error
type DocProvider ¶
type DocProvider struct {
DocCount int
Docs []byte
Metas []byte
// contains filtered or unexported fields
}
func NewDocProvider ¶
func NewDocProvider() *DocProvider
func (*DocProvider) Append ¶
func (dp *DocProvider) Append(doc []byte, docRoot *insaneJSON.Root, id seq.ID, tokens []seq.Token)
func (*DocProvider) TryReset ¶
func (dp *DocProvider) TryReset()
type DocsMetasCompressor ¶
type DocsMetasCompressor struct {
// contains filtered or unexported fields
}
func GetDocsMetasCompressor ¶
func GetDocsMetasCompressor(docsCompressLevel, metaCompressLevel int) *DocsMetasCompressor
func (*DocsMetasCompressor) CompressDocsAndMetas ¶
func (c *DocsMetasCompressor) CompressDocsAndMetas(docs, meta []byte)
CompressDocsAndMetas prepare docs and meta blocks for bulk insert.
func (*DocsMetasCompressor) DocsMetas ¶
func (c *DocsMetasCompressor) DocsMetas() ([]byte, []byte)
type DocsPositions ¶
type DocsPositions struct {
// contains filtered or unexported fields
}
func NewSyncDocsPositions ¶
func NewSyncDocsPositions() *DocsPositions
func (*DocsPositions) SetMultiple ¶
SetMultiple returns a slice of added ids
type EmptyDataProvider ¶
type EmptyDataProvider struct{}
func (EmptyDataProvider) Search ¶
func (EmptyDataProvider) Search(params processor.SearchParams) (*seq.QPR, error)
type FileWriter ¶
type FileWriter struct {
// contains filtered or unexported fields
}
FileWriter optimizes sequential writing and fsync calls for concurrent writers.
The write offset is calculated strictly sequentially using atomic. After that, a request for fsync is sent. The request waits if fsync is being performed from previous requests. During this wait, other fsync requests may arrive that are also waiting for the previous one to complete. After that, a new fsync is performed, after which all requests receive a response about the successful (or unsuccessful) fsync.
This results in one fsync system call for several writers performing a write at approximately the same time.
func NewFileWriter ¶
func NewFileWriter(ws writeSyncer, offset int64, skipSync bool) *FileWriter
func (*FileWriter) Stop ¶
func (fs *FileWriter) Stop()
type IndexCache ¶
type IndexCache struct {
Registry *cache.Cache[[]byte]
MIDs *cache.Cache[[]byte]
RIDs *cache.Cache[[]byte]
Params *cache.Cache[seqids.BlockParams]
Tokens *cache.Cache[*token.Block]
TokenTable *cache.Cache[token.Table]
LIDs *cache.Cache[*lids.Block]
}
func (*IndexCache) Release ¶
func (s *IndexCache) Release()
type Info ¶
type Info struct {
Path string `json:"name"`
Ver string `json:"ver"`
BinaryDataVer config.BinaryDataVersion `json:"binary_data_ver"`
DocsTotal uint32 `json:"docs_total"`
DocsOnDisk uint64 `json:"docs_on_disk"` // how much compressed docs data is stored on disk
DocsRaw uint64 `json:"docs_raw"` // how much raw docs data is appended
MetaOnDisk uint64 `json:"meta_on_disk"` // how much compressed metadata is stored on disk
IndexOnDisk uint64 `json:"index_on_disk"` // how much compressed index data is stored on disk
ConstRegularBlockSize int `json:"const_regular_block_size"`
ConstIDsPerBlock int `json:"const_ids_per_block"`
ConstLIDBlockCap int `json:"const_lid_block_cap"`
From seq.MID `json:"from"`
To seq.MID `json:"to"`
CreationTime uint64 `json:"creation_time"`
SealingTime uint64 `json:"sealing_time"`
Distribution *seq.MIDsDistribution `json:"distribution"`
}
func (*Info) BuildDistribution ¶
func (*Info) InitEmptyDistribution ¶
type MetaData ¶
type MetaData struct {
ID seq.ID
// Size of an uncompressed document in bytes.
Size uint32
Tokens []MetaToken
}
func (*MetaData) MarshalBinaryTo ¶
func (*MetaData) UnmarshalBinary ¶
type MetaToken ¶
func (*MetaToken) MarshalBinaryTo ¶
type PSD ¶
type PSD int // emulates hard shutdown on different stages of fraction deletion, used for tests
type PreloadedData ¶
type PreloadedData struct {
// contains filtered or unexported fields
}
func Seal ¶
func Seal(f *Active, params SealParams) (*PreloadedData, error)
type Remote ¶ added in v0.60.0
Remote fraction is a fraction that is backed by remote storage.
Structure of Remote fraction is almost identical to the Sealed one. In fact, they share the same on-disk binary layout, access methods and any other logic, but having Remote fraction allows us to easily distinguish between local and remote fractions.
func (*Remote) DataProvider ¶ added in v0.60.0
func (f *Remote) DataProvider(ctx context.Context) (DataProvider, func())
func (*Remote) IsIntersecting ¶ added in v0.60.0
type SealParams ¶
type SealParams struct {
IDsZstdLevel int
LIDsZstdLevel int
TokenListZstdLevel int
DocsPositionsZstdLevel int
TokenTableZstdLevel int
DocBlocksZstdLevel int // DocBlocksZstdLevel is the zstd compress level of each document block.
DocBlockSize int // DocBlockSize is decompressed payload size of document block.
}
type Sealed ¶
type Sealed struct {
Config *Config
BaseFileName string
// shit for testing
PartialSuicideMode PSD
// contains filtered or unexported fields
}
func NewSealed ¶
func NewSealed( baseFile string, readLimiter *storage.ReadLimiter, indexCache *IndexCache, docsCache *cache.Cache[[]byte], info *Info, config *Config, ) *Sealed
func NewSealedPreloaded ¶
func NewSealedPreloaded( baseFile string, preloaded *PreloadedData, rl *storage.ReadLimiter, indexCache *IndexCache, docsCache *cache.Cache[[]byte], config *Config, ) *Sealed
func (*Sealed) DataProvider ¶
func (f *Sealed) DataProvider(ctx context.Context) (DataProvider, func())
type SearchConfig ¶
type SearchConfig struct {
AggLimits AggLimits
}
type TokenLIDs ¶
type TokenLIDs struct {
// contains filtered or unexported fields
}
func (*TokenLIDs) PutLIDsInQueue ¶
type TokenList ¶
func NewActiveTokenList ¶
func (*TokenList) FindPattern ¶
func (*TokenList) GetAllTokenLIDs ¶
func (*TokenList) GetFieldSizes ¶
func (*TokenList) GetTIDsByField ¶
func (*TokenList) GetValByTID ¶
Source Files
¶
- active.go
- active_docs_positions.go
- active_ids.go
- active_index.go
- active_indexer.go
- active_lids.go
- active_sealer.go
- active_token_list.go
- active_writer.go
- block_info.go
- block_offsets.go
- compress.go
- config.go
- disk_blocks.go
- disk_blocks_producer.go
- disk_blocks_writer.go
- doc_provider.go
- empty_data_provider.go
- file_writer.go
- fraction.go
- index_cache.go
- info.go
- inverser.go
- meta_data_collector.go
- remote.go
- seal_stats.go
- sealed.go
- sealed_index.go
- sealed_loader.go