Documentation
¶
Index ¶
- func CreateSnapshot(snapshot *Snapshot) error
- func CreateTask(task *Task) error
- func DeleteTask(id string) error
- func IncrementDomainLinkCount(host string) error
- func UpdateTask(task *Task)
- type Domain
- type DomainSetting
- type IndexDocument
- type KV
- type LinkGroup
- type PageLink
- type Seed
- type Snapshot
- type Task
- type TaskStatus
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CreateSnapshot ¶
func CreateTask ¶
func DeleteTask ¶
func UpdateTask ¶
func UpdateTask(task *Task)
Types ¶
type Domain ¶
type Domain struct {
Host string `storm:"id,unique" json:"host,omitempty" gorm:"not null;unique;primary_key"`
LinksCount int64 `json:"links_count,omitempty"`
Favicon string `json:"favicon,omitempty"`
Settings *DomainSetting `storm:"inline" json:"settings,omitempty"`
CreateTime *time.Time `storm:"index" json:"created,omitempty"`
UpdateTime *time.Time `storm:"index" json:"updated,omitempty"`
}
func CreateDomain ¶
type DomainSetting ¶
type DomainSetting struct {
}
type IndexDocument ¶
type Seed ¶
type Seed struct {
Url string `storm:"index" json:"url,omitempty" gorm:"type:not null;varchar(500)"` // the seed url may not cleaned, may miss the domain part, need reference to provide the complete url information
Reference string `json:"reference_url,omitempty"`
Depth int `storm:"index" json:"depth,omitempty"`
Breadth int `storm:"index" json:"breadth,omitempty"`
}
func TaskSeedFromBytes ¶
func (Seed) MustGetBytes ¶
type Snapshot ¶
type Snapshot struct {
ID string `json:"id,omitempty" gorm:"not null;unique;primary_key"`
Version int `json:"version,omitempty"`
Url string `json:"url,omitempty"`
TaskID string `json:"task_id,omitempty"`
Path string `json:"path,omitempty" gorm:"-"` //path of this file
File string `json:"file,omitempty" gorm:"-"` //filename of this page
StatusCode int `json:"-" gorm:"-"`
Payload []byte `json:"-" gorm:"-"`
Size uint64 `json:"size,omitempty"`
Headers map[string][]string `json:"-" gorm:"-"`
Metadata *map[string]interface{} `json:"-" gorm:"-"`
Parameters []KV `json:"-" gorm:"-"`
Language string `json:"lang,omitempty" gorm:"-"`
Title string `json:"title,omitempty"`
Summary string `json:"summary,omitempty" gorm:"-"`
Text string `json:"text,omitempty" gorm:"-"`
ContentType string `json:"content_type,omitempty"`
Tags []string `json:"tags,omitempty" gorm:"-"`
Links LinkGroup `json:"links,omitempty" gorm:"-"`
Images struct {
Internal []PageLink `json:"internal,omitempty"`
External []PageLink `json:"external,omitempty"`
} `json:"images,omitempty" gorm:"-"`
H1 []string `json:"h1,omitempty" gorm:"-"`
H2 []string `json:"h2,omitempty" gorm:"-"`
H3 []string `json:"h3,omitempty" gorm:"-"`
H4 []string `json:"h4,omitempty" gorm:"-"`
H5 []string `json:"h5,omitempty" gorm:"-"`
Bold []string `json:"bold,omitempty" gorm:"-"`
Italic []string `json:"italic,omitempty" gorm:"-"`
Classifications []string `json:"classifications,omitempty" gorm:"-"`
EnrichedFeatures *map[string]interface{} `json:"enriched_features,omitempty" gorm:"-"`
Hash string `json:"hash,omitempty"`
SimHash string `json:"sim_hash,omitempty"`
CreateTime *time.Time `json:"created,omitempty"`
}
type Task ¶
type Task struct {
Seed
ID string `gorm:"not null;unique;primary_key" json:"id"`
Host string `gorm:"index" json:"-"`
Schema string `json:"schema,omitempty"`
OriginalUrl string `json:"original_url,omitempty"`
Phrase pipeline.Phrase `gorm:"index" json:"phrase"`
Status TaskStatus `gorm:"index" json:"status"`
Message string `json:"-"`
CreateTime *time.Time `gorm:"index" json:"created,omitempty"`
UpdateTime *time.Time `gorm:"index" json:"updated,omitempty"`
LastFetchTime *time.Time `gorm:"index" json:"last_fetch"`
LastCheckTime *time.Time `gorm:"index" json:"last_check"`
NextCheckTime *time.Time `gorm:"index" json:"next_check"`
SnapshotVersion int `json:"snapshot_version"`
SnapshotID string `json:"snapshot_id"` //Last Snapshot's ID
SnapshotHash string `json:"snapshot_hash"` //Last Snapshot's Hash
SnapshotSimHash string `json:"snapshot_simhash"` //Last Snapshot's Simhash
SnapshotCreateTime *time.Time `json:"snapshot_created"` //Last Snapshot's Simhash
}
func GetPendingNewFetchTasks ¶
func GetTaskByField ¶
type TaskStatus ¶
type TaskStatus int
const Task404Ignore TaskStatus = 4
const TaskCreated TaskStatus = 0
const TaskFetchFailed TaskStatus = 2
const TaskFetchSuccess TaskStatus = 3
const TaskFetchTimeout TaskStatus = 6
const TaskRedirectedIgnore TaskStatus = 5
Click to show internal directories.
Click to hide internal directories.