Documentation
¶
Index ¶
- func CreateSnapshot(snapshot *Snapshot) error
- func CreateTask(task *Task) error
- func DeleteSnapshot(snapshot *Snapshot) error
- func DeleteTask(id string) error
- func GetTaskStatusText(status TaskStatus) string
- func IncrementDomainLinkCount(host string) error
- func UpdateTask(task *Task)
- type Aggregation
- type Bucket
- type Domain
- type DomainSetting
- type IndexDocument
- type KV
- type LinkGroup
- type PageLink
- type Seed
- type Snapshot
- type Task
- type TaskSetting
- type TaskStatus
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CreateSnapshot ¶
func CreateTask ¶
func DeleteSnapshot ¶ added in v0.9.0
func DeleteTask ¶
func GetTaskStatusText ¶ added in v0.9.0
func GetTaskStatusText(status TaskStatus) string
func IncrementDomainLinkCount ¶
IncrementDomainLinkCount update domain's link count
func UpdateTask ¶
func UpdateTask(task *Task)
Types ¶
type Aggregation ¶ added in v0.9.0
type Aggregation struct {
Buckets []Bucket `json:"buckets,omitempty"`
}
type Domain ¶
type Domain struct {
Host string `storm:"id,unique" json:"host,omitempty" gorm:"not null;unique;primary_key" index:"id"`
LinksCount int64 `json:"links_count,omitempty"`
Favicon string `json:"favicon,omitempty"`
Settings *DomainSetting `storm:"inline" json:"settings,omitempty"`
Created *time.Time `storm:"index" json:"created,omitempty"`
Updated *time.Time `storm:"index" json:"updated,omitempty"`
}
Domain is domain host struct
type IndexDocument ¶
type IndexDocument struct {
Index string `json:"_index,omitempty"`
Id string `json:"_id,omitempty"`
Source map[string]interface{} `json:"_source,omitempty"`
Highlight map[string][]interface{} `json:"highlight,omitempty"`
}
IndexDocument used to construct indexing document
type Seed ¶
type Seed struct {
// the seed url may not cleaned, may miss the domain part, need reference to provide the complete url information
Url string `storm:"index" json:"url,omitempty" gorm:"type:varchar(500)"`
Reference string `json:"reference_url,omitempty"`
Depth int `storm:"index" json:"depth"`
Breadth int `storm:"index" json:"breadth"`
}
func TaskSeedFromBytes ¶
func (Seed) MustGetBytes ¶
type Snapshot ¶
type Snapshot struct {
ID string `json:"id,omitempty" gorm:"not null;unique;primary_key" index:"id"`
Version int `json:"version,omitempty"`
Url string `json:"url,omitempty"`
TaskID string `json:"task_id,omitempty"`
Path string `json:"path,omitempty" gorm:"-"` //path of this file
File string `json:"file,omitempty" gorm:"-"` //filename of this page
StatusCode int `json:"-" gorm:"-"`
Payload []byte `json:"-" gorm:"-"`
Size uint64 `json:"size,omitempty"`
Headers map[string][]string `json:"-" gorm:"-"`
Metadata *map[string]interface{} `json:"-" gorm:"-"`
Parameters []KV `json:"-" gorm:"-"`
Language string `json:"lang,omitempty" gorm:"-"`
Title string `json:"title,omitempty"`
Summary string `json:"summary,omitempty" gorm:"-"`
Text string `json:"text,omitempty" gorm:"-"`
ContentType string `json:"content_type,omitempty"`
Tags []string `json:"tags,omitempty" gorm:"-"`
Links LinkGroup `json:"links,omitempty" gorm:"-"`
Images struct {
Internal []PageLink `json:"internal,omitempty"`
External []PageLink `json:"external,omitempty"`
} `json:"images,omitempty" gorm:"-"`
H1 []string `json:"h1,omitempty" gorm:"-"`
H2 []string `json:"h2,omitempty" gorm:"-"`
H3 []string `json:"h3,omitempty" gorm:"-"`
H4 []string `json:"h4,omitempty" gorm:"-"`
H5 []string `json:"h5,omitempty" gorm:"-"`
Bold []string `json:"bold,omitempty" gorm:"-"`
Italic []string `json:"italic,omitempty" gorm:"-"`
Classifications []string `json:"classifications,omitempty" gorm:"-"`
EnrichedFeatures *map[string]interface{} `json:"enriched_features,omitempty" gorm:"-"`
Hash string `json:"hash,omitempty"`
SimHash string `json:"sim_hash,omitempty"`
Created *time.Time `json:"created,omitempty"`
}
func GetSnapshot ¶ added in v0.9.0
func GetSnapshotByField ¶ added in v0.9.0
type Task ¶
type Task struct {
Seed
ID string `gorm:"not null;unique;primary_key" json:"id" index:"id"`
Host string `gorm:"index" json:"host"`
Schema string `json:"schema,omitempty"`
OriginalUrl string `json:"original_url,omitempty"`
Phrase pipeline.Phrase `gorm:"index" json:"phrase"`
Status TaskStatus `gorm:"index" json:"status"`
Message string `json:"message,omitempty"`
Created *time.Time `gorm:"index" json:"created,omitempty"`
Updated *time.Time `gorm:"index" json:"updated,omitempty"`
LastFetch *time.Time `gorm:"index" json:"last_fetch,omitempty"`
LastCheck *time.Time `gorm:"index" json:"last_check,omitempty"`
NextCheck *time.Time `gorm:"index" json:"next_check,omitempty"`
SnapshotVersion int `json:"snapshot_version,omitempty"`
SnapshotID string `json:"snapshot_id,omitempty"`
SnapshotHash string `json:"snapshot_hash,omitempty"`
SnapshotSimHash string `json:"snapshot_simhash,omitempty"`
SnapshotCreated *time.Time `json:"snapshot_created,omitempty"`
}
func GetPendingNewFetchTasks ¶
func GetTaskByField ¶
type TaskSetting ¶ added in v0.9.0
type TaskSetting struct {
EnabledJoints []string
}
TaskSetting contain settings for task
type TaskStatus ¶
type TaskStatus int
const Task404 TaskStatus = 4
const TaskCreated TaskStatus = 0
const TaskDuplicated TaskStatus = 7
const TaskFailed TaskStatus = 2
const TaskInterrupted TaskStatus = 8
const TaskRedirected TaskStatus = 5
const TaskSuccess TaskStatus = 3
const TaskTimeout TaskStatus = 6
Click to show internal directories.
Click to hide internal directories.