Documentation
¶
Index ¶
- type ByKey
- type Data2Starrocks
- type DictTireTree
- type Document
- type InputDataList
- type InvertedIndexValue
- type InvertedInfo
- type KeyValue
- type MapReduceTask
- type MasterTask
- type MasterTaskStatus
- type PostingsList
- type SearchItem
- type SearchItemList
- type State
- type Task
- type TermValue
- type Tokenization
- type UserTokenData
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Data2Starrocks ¶ added in v0.1.1
type DictTireTree ¶ added in v0.1.1
type Document ¶ added in v0.1.1
type Document struct {
DocId int64 `json:"doc_id"`
Title string `json:"title"`
Body string `json:"body"`
}
Document 文档格式
func (Document) MarshalEasyJSON ¶ added in v0.1.1
MarshalEasyJSON supports easyjson.Marshaler interface
func (Document) MarshalJSON ¶ added in v0.1.1
MarshalJSON supports json.Marshaler interface
func (*Document) UnmarshalEasyJSON ¶ added in v0.1.1
UnmarshalEasyJSON supports easyjson.Unmarshaler interface
func (*Document) UnmarshalJSON ¶ added in v0.1.1
UnmarshalJSON supports json.Unmarshaler interface
type InputDataList ¶ added in v0.1.1
type InvertedIndexValue ¶ added in v0.1.1
type InvertedIndexValue struct {
Token string `json:"token"`
PostingsList *PostingsList `json:"postings_list"`
DocCount int64 `json:"doc_count"`
PositionCount int64 `json:"position_count"` // 查询使用,写入的时候暂时不用
TermValues *TermValue `json:"term_values"`
}
InvertedIndexValue 倒排索引
type InvertedInfo ¶ added in v0.1.1
type MapReduceTask ¶ added in v0.1.1
type MapReduceTask struct {
Input string `json:"input"` // 输入的文件
TaskState State `json:"task_state"` // 状态
NReducer int `json:"n_reducer"` // reducer 数量
TaskNumber int `json:"task_number"` // 任务数量
Intermediates []string `json:"intermediates"` // map 之后的文件存储地址
Output string `json:"output"` // output的输出地址
}
type MasterTask ¶ added in v0.1.1
type MasterTask struct {
TaskStatus MasterTaskStatus
StartTime time.Time
TaskReference *MapReduceTask
}
type MasterTaskStatus ¶ added in v0.1.1
type MasterTaskStatus int
const ( Idle MasterTaskStatus = iota + 1 // 未开始 InProgress // 进行中 Completed // 已完成 )
type PostingsList ¶ added in v0.1.1
type SearchItem ¶ added in v0.1.1
type SearchItem struct {
DocId int64 `json:"doc_id"`
Content string `json:"content"`
Title string `json:"title"`
Score float64 `json:"score"` // 这个词对于这篇文章的评分,也就是这个词到底重不重要
DocCount int64 `json:"doc_count"` // 这个词在文中出现了多少次
ContentScore float64 `json:"content_score"` // 这篇文章的评分
}
SearchItem 查询结果
type SearchItemList ¶ added in v0.1.1
type SearchItemList []*SearchItem
func (SearchItemList) Len ¶ added in v0.1.1
func (ds SearchItemList) Len() int
func (SearchItemList) Less ¶ added in v0.1.1
func (ds SearchItemList) Less(i, j int) bool
func (SearchItemList) Swap ¶ added in v0.1.1
func (ds SearchItemList) Swap(i, j int)
type Tokenization ¶ added in v0.1.1
type Tokenization struct {
Token string // 词条
// Position int64 // 词条在文本的位置 // TODO 后面再补上
// Offset int64 // 偏移量
DocId int64
}
Tokenization 分词返回结构
type UserTokenData ¶
Click to show internal directories.
Click to hide internal directories.