segment

package
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 28, 2023 License: MIT Imports: 12 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	TermDbSuffix     = ".term"     // term db suffix
	InvertedDbSuffix = ".inverted" // inverted db suffix
	ForwardDbSuffix  = ".forward"  // forward db suffix
	DictDbSuffix     = ".dict"     // forward db suffix
)

Functions

func CreateNewInvertedIndex

func CreateNewInvertedIndex(token analyzer.Tokenization, docCount int64) *types.InvertedIndexValue

CreateNewInvertedIndex 创建倒排索引

func CreateNewPostingsList

func CreateNewPostingsList(docId int64) *types.PostingsList

CreateNewPostingsList 创建倒排索引

func GetDbName

func GetDbName(segId SegId) (string, string, string, string)

GetDbName 获取db的路径+名称

func InitSegmentDb

func InitSegmentDb(segId SegId) (invertedDb *storage.InvertedDB, forwardDb *storage.ForwardDB, dictDb *storage.DictDB, err error)

InitSegmentDb 读取对应segment文件下的db

func MergeInvertedIndex

func MergeInvertedIndex(base, toBeAdd InvertedIndexHash)

MergeInvertedIndex 合并两个倒排索引

func MergeKForwardSegments

func MergeKForwardSegments(seg *Segment, list []*TermNode, chList []chan storage.KvInfo) error

MergeKForwardSegments 合并正排

func MergePostings

func MergePostings(pa, pb *types.PostingsList) *types.PostingsList

MergePostings 合并两个posting

func Token2PostingsLists

func Token2PostingsLists(bufInvertHash InvertedIndexHash, token analyzer.Tokenization, docId int64) (err error)

Token2PostingsLists 词条 转化成 倒排索引表

Types

type InvertedIndexHash

type InvertedIndexHash map[string]*types.InvertedIndexValue

InvertedIndexHash 倒排hash

func MergeKTermSegments

func MergeKTermSegments(list []*TermNode, chList []chan storage.KvInfo) (res InvertedIndexHash, err error)

MergeKTermSegments 多路归并,合并term数据,合并后需要一起处理合并倒排表数据

type LoserTree

type LoserTree struct {
	// contains filtered or unexported fields
}

LoserTree 败者数

func NewSegLoserTree

func NewSegLoserTree(leaves []*TermNode, leavesCh []chan storage.KvInfo) *LoserTree

func (*LoserTree) Pop

func (lt *LoserTree) Pop() (res *TermNode)

Pop 弹出最小值

type Mode

type Mode int64

Mode 查询 or 索引模式

const (
	SearchMode Mode = 1 // 查询模式
	IndexMode  Mode = 2 // 索引模式
	MergeMode  Mode = 3 // seg merge 模式
)

type SegId

type SegId int64

func NewSegments

func NewSegments(meta *SegMeta, mode Mode) (SegId, map[SegId]*Segment)

NewSegments 创建新的segments 更新next seg

type SegInfo

type SegInfo struct {
	SegId            SegId `json:"seg_name"`           // 段前缀名
	SegSize          int64 `json:"seg_size"`           // 写入doc数量
	InvertedFileSize int64 `json:"inverted_file_size"` // 写入inverted文件大小
	ForwardFileSize  int64 `json:"forward_file_size"`  // 写入forward文件大小
	DictFileSize     int64 `json:"dict_file_size"`     // 写入forward文件大小
	DelSize          int64 `json:"del_size"`           // 删除文档数量
	DelFileSize      int64 `json:"del_file_size"`      // 删除文档文件大小
	TermSize         int64 `json:"term_size"`          // term文档文件大小
	TermFileSize     int64 `json:"term_file_size"`     // term文件大小
	ReferenceCount   int64 `json:"reference_count"`    // 引入计数
	IsReading        bool  `json:"is_reading"`         // 是否正在被读取
	IsMerging        bool  `json:"is_merging"`         // 是否正在参与合并
}

SegInfo 段信息

type SegMeta

type SegMeta struct {
	NextSeg  SegId              `json:"next_seg"`
	SegCount int64              `json:"seg_count"`
	SegInfo  map[SegId]*SegInfo `json:"seg_info"` // TODO replace sync.map

	sync.Mutex
}

SegMeta 元数据

func (*SegMeta) NewSegmentItem

func (m *SegMeta) NewSegmentItem() error

func (*SegMeta) UpdateSegMeta

func (m *SegMeta) UpdateSegMeta(segId SegId, indexCount int64) error

UpdateSegMeta 更新段信息

type Segment

type Segment struct {
	*storage.ForwardDB  // 正排索引库
	*storage.InvertedDB // 倒排索引库
	*storage.DictDB     // 存储trie树
}

func NewSegment

func NewSegment(segId SegId) *Segment

func (*Segment) Close

func (e *Segment) Close()

Close --

func (*Segment) FetchPostings

func (e *Segment) FetchPostings(token string) (p *types.InvertedIndexValue, err error)

FetchPostings 通过 token 读取倒排表数据,返回倒排索引

func (*Segment) FlushInvertedIndex added in v0.1.0

func (e *Segment) FlushInvertedIndex(PostingsHashBuf InvertedIndexHash) (err error)

FlushInvertedIndex 落盘操作

func (*Segment) FlushTokenDict added in v0.1.0

func (e *Segment) FlushTokenDict(trieTree *trie.Trie) (err error)

FlushTokenDict 刷新写入 token dict

type SkipListInt

type SkipListInt struct {
	SkipNodeInt
	// contains filtered or unexported fields
}

func NewSkipListInt

func NewSkipListInt(skip ...int) *SkipListInt

type SkipNodeInt

type SkipNodeInt struct {
	// contains filtered or unexported fields
}

type TermNode

type TermNode struct {
	*storage.KvInfo
	Seg *Segment
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL