model

package
v0.9.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 20, 2017 License: Apache-2.0 Imports: 10 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CreateSnapshot

func CreateSnapshot(snapshot *Snapshot) error

func CreateTask

func CreateTask(task *Task) error

func DeleteSnapshot added in v0.9.0

func DeleteSnapshot(snapshot *Snapshot) error

func DeleteTask

func DeleteTask(id string) error

func GetTaskStatusText added in v0.9.0

func GetTaskStatusText(status TaskStatus) string

func IncrementDomainLinkCount

func IncrementDomainLinkCount(host string) error

IncrementDomainLinkCount update domain's link count

func UpdateTask

func UpdateTask(task *Task)

Types

type Aggregation added in v0.9.0

type Aggregation struct {
	Buckets []Bucket `json:"buckets,omitempty"`
}

type Bucket added in v0.9.0

type Bucket struct {
	Key      string `json:"key,omitempty"`
	DocCount int    `json:"doc_count,omitempty"`
}

type Domain

type Domain struct {
	Host       string         `storm:"id,unique" json:"host,omitempty" gorm:"not null;unique;primary_key" index:"id"`
	LinksCount int64          `json:"links_count,omitempty"`
	Favicon    string         `json:"favicon,omitempty"`
	Settings   *DomainSetting `storm:"inline" json:"settings,omitempty"`
	Created    *time.Time     `storm:"index" json:"created,omitempty"`
	Updated    *time.Time     `storm:"index" json:"updated,omitempty"`
}

Domain is domain host struct

func CreateDomain

func CreateDomain(host string) Domain

CreateDomain create a domain

func GetDomain

func GetDomain(domain string) (Domain, error)

GetDomain return a single domain

func GetDomainList

func GetDomainList(from, size int, domain string) (int, []Domain, error)

GetDomainList return domain list

type DomainSetting

type DomainSetting struct {
}

DomainSetting is a settings for specific domain

type IndexDocument

type IndexDocument struct {
	Index     string                   `json:"_index,omitempty"`
	Id        string                   `json:"_id,omitempty"`
	Source    map[string]interface{}   `json:"_source,omitempty"`
	Highlight map[string][]interface{} `json:"highlight,omitempty"`
}

IndexDocument used to construct indexing document

type KV

type KV struct {
	Key   string   `json:"key,omitempty"`
	Value []string `storm:"inline" json:"value,omitempty"`
}

type LinkGroup

type LinkGroup struct {
	Internal []PageLink `json:"internal,omitempty"`
	External []PageLink `json:"external,omitempty"`
}
type PageLink struct {
	Url   string `json:"url,omitempty"`
	Label string `json:"label,omitempty"`
}

type Seed

type Seed struct {
	// the seed url may not cleaned, may miss the domain part, need reference to provide the complete url information
	Url       string `storm:"index" json:"url,omitempty" gorm:"type:varchar(500)"`
	Reference string `json:"reference_url,omitempty"`
	Depth     int    `storm:"index" json:"depth"`
	Breadth   int    `storm:"index" json:"breadth"`
}

func NewTaskSeed

func NewTaskSeed(url, ref string, depth int, breadth int) Seed

func TaskSeedFromBytes

func TaskSeedFromBytes(b []byte) Seed

func (Seed) Get

func (this Seed) Get(url string) Seed

func (Seed) GetBytes

func (this Seed) GetBytes() ([]byte, error)

func (Seed) MustGetBytes

func (this Seed) MustGetBytes() []byte

type Snapshot

type Snapshot struct {
	ID      string `json:"id,omitempty" gorm:"not null;unique;primary_key" index:"id"`
	Version int    `json:"version,omitempty"`
	Url     string `json:"url,omitempty"`
	TaskID  string `json:"task_id,omitempty"`
	Path    string `json:"path,omitempty"  gorm:"-"` //path of this file
	File    string `json:"file,omitempty"  gorm:"-"` //filename of this page

	StatusCode int    `json:"-" gorm:"-"`
	Payload    []byte `json:"-" gorm:"-"`
	Size       uint64 `json:"size,omitempty"`

	Headers    map[string][]string     `json:"-" gorm:"-"`
	Metadata   *map[string]interface{} `json:"-" gorm:"-"`
	Parameters []KV                    `json:"-" gorm:"-"`

	Language string `json:"lang,omitempty" gorm:"-"`

	Title       string `json:"title,omitempty"`
	Summary     string `json:"summary,omitempty" gorm:"-"`
	Text        string `json:"text,omitempty" gorm:"-"`
	ContentType string `json:"content_type,omitempty"`

	Tags []string `json:"tags,omitempty" gorm:"-"`

	Links LinkGroup `json:"links,omitempty" gorm:"-"`

	Images struct {
		Internal []PageLink `json:"internal,omitempty"`
		External []PageLink `json:"external,omitempty"`
	} `json:"images,omitempty" gorm:"-"`

	H1     []string `json:"h1,omitempty" gorm:"-"`
	H2     []string `json:"h2,omitempty" gorm:"-"`
	H3     []string `json:"h3,omitempty" gorm:"-"`
	H4     []string `json:"h4,omitempty" gorm:"-"`
	H5     []string `json:"h5,omitempty" gorm:"-"`
	Bold   []string `json:"bold,omitempty" gorm:"-"`
	Italic []string `json:"italic,omitempty" gorm:"-"`

	Classifications  []string                `json:"classifications,omitempty" gorm:"-"`
	EnrichedFeatures *map[string]interface{} `json:"enriched_features,omitempty" gorm:"-"`

	Hash    string `json:"hash,omitempty"`
	SimHash string `json:"sim_hash,omitempty"`

	Created *time.Time `json:"created,omitempty"`
}

func GetSnapshot added in v0.9.0

func GetSnapshot(id string) (Snapshot, error)

func GetSnapshotByField added in v0.9.0

func GetSnapshotByField(k, v string) ([]Snapshot, error)

func GetSnapshotList added in v0.9.0

func GetSnapshotList(from, size int, taskId string) (int, []Snapshot, error)

type Task

type Task struct {
	Seed
	ID          string          `gorm:"not null;unique;primary_key" json:"id" index:"id"`
	Host        string          `gorm:"index" json:"host"`
	Schema      string          `json:"schema,omitempty"`
	OriginalUrl string          `json:"original_url,omitempty"`
	Phrase      pipeline.Phrase `gorm:"index" json:"phrase"`
	Status      TaskStatus      `gorm:"index" json:"status"`
	Message     string          `json:"message,omitempty"`
	Created     *time.Time      `gorm:"index" json:"created,omitempty"`
	Updated     *time.Time      `gorm:"index" json:"updated,omitempty"`
	LastFetch   *time.Time      `gorm:"index" json:"last_fetch,omitempty"`
	LastCheck   *time.Time      `gorm:"index" json:"last_check,omitempty"`
	NextCheck   *time.Time      `gorm:"index" json:"next_check,omitempty"`

	SnapshotVersion int        `json:"snapshot_version,omitempty"`
	SnapshotID      string     `json:"snapshot_id,omitempty"`
	SnapshotHash    string     `json:"snapshot_hash,omitempty"`
	SnapshotSimHash string     `json:"snapshot_simhash,omitempty"`
	SnapshotCreated *time.Time `json:"snapshot_created,omitempty"`
}

func GetPendingNewFetchTasks

func GetPendingNewFetchTasks() (int, []Task, error)

func GetPendingUpdateFetchTasks

func GetPendingUpdateFetchTasks(offset *time.Time) (int, []Task, error)

func GetTask

func GetTask(id string) (Task, error)

func GetTaskByField

func GetTaskByField(k, v string) ([]Task, error)

func GetTaskList

func GetTaskList(from, size int, domain string) (int, []Task, error)

type TaskSetting added in v0.9.0

type TaskSetting struct {
	EnabledJoints []string
}

TaskSetting contain settings for task

type TaskStatus

type TaskStatus int
const Task404 TaskStatus = 4
const TaskCreated TaskStatus = 0
const TaskDuplicated TaskStatus = 7
const TaskFailed TaskStatus = 2
const TaskInterrupted TaskStatus = 8
const TaskRedirected TaskStatus = 5
const TaskSuccess TaskStatus = 3
const TaskTimeout TaskStatus = 6

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL