model

package
v0.10.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 20, 2018 License: Apache-2.0 Imports: 15 Imported by: 0

Documentation

Index

Constants

View Source
const (
	ROLE_GUEST string = "guest"
	ROLE_ADMIN string = "admin"
)
View Source
const (
	//GUEST
	PERMISSION_SNAPSHOT_VIEW string = "view_snapshot"

	//ADMIN
	PERMISSION_ADMIN_MINIMAL string = "admin_minimal"
)
View Source
const PipelineConfigBucket = "PipelineConfig"
View Source
const Task404 int = 4
View Source
const TaskCreated int = 0
View Source
const TaskDuplicated int = 7
View Source
const TaskFailed int = 2
View Source
const TaskInterrupted int = 8
View Source
const TaskPendingFetch int = 9
View Source
const TaskRedirected int = 5
View Source
const TaskSuccess int = 3
View Source
const TaskTimeout int = 6

Variables

This section is empty.

Functions

func CreateHostConfig added in v0.10.0

func CreateHostConfig(config *HostConfig) error

func CreatePipelineConfig added in v0.10.0

func CreatePipelineConfig(cfg *PipelineConfig) error

func CreateProject added in v0.10.0

func CreateProject(project *Project) error

func CreateSnapshot

func CreateSnapshot(snapshot *Snapshot) error

func CreateTask

func CreateTask(task *Task) error

func DeleteHostConfig added in v0.10.0

func DeleteHostConfig(id string) error

func DeletePipelineConfig added in v0.10.0

func DeletePipelineConfig(id string) error

func DeleteProject added in v0.10.0

func DeleteProject(id string) error

func DeleteSnapshot added in v0.9.0

func DeleteSnapshot(snapshot *Snapshot) error

func DeleteTask

func DeleteTask(id string) error

func GetAllRegisteredJoints added in v0.10.0

func GetAllRegisteredJoints() map[string]interface{}

func GetHostStatus added in v0.10.0

func GetHostStatus(status int) (error, map[string]interface{})

func GetPermissionsByRole added in v0.10.0

func GetPermissionsByRole(role string) (*hashset.Set, error)

func GetTaskStatus added in v0.10.0

func GetTaskStatus(host string) (error, map[string]interface{})

func GetTaskStatusText added in v0.9.0

func GetTaskStatusText(status int) string

func RegisterPipeJoint added in v0.10.0

func RegisterPipeJoint(joint Joint)

func RegisterPipeJointWithName added in v0.10.0

func RegisterPipeJointWithName(jointName string, joint Joint)

func UpdateHostConfig added in v0.10.0

func UpdateHostConfig(config *HostConfig) error

func UpdatePipelineConfig added in v0.10.0

func UpdatePipelineConfig(id string, cfg *PipelineConfig) error

func UpdateProject added in v0.10.0

func UpdateProject(project *Project) error

func UpdateTask

func UpdateTask(task *Task) error

Types

type Aggregation added in v0.9.0

type Aggregation struct {
	Buckets []Bucket `json:"buckets,omitempty"`
}

type Bucket added in v0.9.0

type Bucket struct {
	Key      string `json:"key,omitempty"`
	DocCount int    `json:"doc_count,omitempty"`
}

type Context added in v0.10.0

type Context struct {
	Parameters

	SequenceID   int64       `json:"sequence"`
	Simulate     bool        `json:"simulate"`
	IgnoreBroken bool        `json:"ignore_broken"`
	Payload      interface{} `json:"-"`
	// contains filtered or unexported fields
}

func (*Context) End added in v0.10.0

func (context *Context) End(msg interface{})

End break all pipelines, but the end phrase not included

func (*Context) Exit added in v0.10.0

func (context *Context) Exit(msg interface{})

Exit tells pipeline to exit

func (*Context) IsEnd added in v0.10.0

func (context *Context) IsEnd() bool

IsEnd indicates whether the pipe process is end, end means no more processes will be execute

func (*Context) IsExit added in v0.10.0

func (context *Context) IsExit() bool

IsExit means all pipelines will be broke and jump to outside, even the end phrase will not be executed as well

type Host added in v0.10.0

type Host struct {
	Host        string        `json:"host,omitempty" gorm:"not null;unique;primary_key" index:"id"`
	Favicon     string        `json:"favicon,omitempty"`
	Enabled     bool          `json:"enabled"`
	HostConfigs *[]HostConfig `json:"host_configs,omitempty"`
	Created     *time.Time    `json:"created,omitempty"`
	Updated     *time.Time    `json:"updated,omitempty"`
}

Host is host struct

func CreateHost added in v0.10.0

func CreateHost(host string) Host

CreateHost create a domain host

func GetHost added in v0.10.0

func GetHost(host string) (Host, error)

GetHost return a single host

func GetHostList added in v0.10.0

func GetHostList(from, size int, host string) (int, []Host, error)

GetHostList return host list

type HostConfig added in v0.10.0

type HostConfig struct {
	ID         string `json:"id,omitempty" gorm:"not null;unique;primary_key" index:"id"`
	Host       string `gorm:"index" json:"host"`
	UrlPattern string `gorm:"index" json:"url_pattern"`
	Runner     string `gorm:"index" json:"runner"`
	SortOrder  int    `gorm:"index" json:"sort_order"`

	PipelineID string `gorm:"index" json:"pipeline_id"`
	Cookies    string `json:"cookies,omitempty"`

	Created time.Time `gorm:"index" json:"created,omitempty"`
	Updated time.Time `gorm:"index" json:"updated,omitempty"`
}

func GetHostConfig added in v0.10.0

func GetHostConfig(runner, host string) []HostConfig

func GetHostConfigByHostAndUrl added in v0.10.0

func GetHostConfigByHostAndUrl(runner, host, url string) (*HostConfig, error)

func GetHostConfigByID added in v0.10.0

func GetHostConfigByID(id string) (HostConfig, error)

func GetHostConfigList added in v0.10.0

func GetHostConfigList(from, size int, host string) (int, []HostConfig, error)

type IndexDocument

type IndexDocument struct {
	Index     string                   `json:"_index,omitempty"`
	ID        string                   `json:"_id,omitempty"`
	Source    map[string]interface{}   `json:"_source,omitempty"`
	Highlight map[string][]interface{} `json:"highlight,omitempty"`
}

IndexDocument used to construct indexing document

type Joint added in v0.10.0

type Joint interface {
	Name() string
	Process(s *Context) error
}

func GetJointInstance added in v0.10.0

func GetJointInstance(cfg *JointConfig) Joint

type JointConfig added in v0.10.0

type JointConfig struct {
	JointName  string                 `json:"joint" config:"joint"`                     //the joint name
	Parameters map[string]interface{} `json:"parameters,omitempty" config:"parameters"` //kv parameters for this joint
	Enabled    bool                   `json:"enabled" config:"enabled"`
}

JointConfig configs for each joint

type KV

type KV struct {
	Key   string   `json:"key,omitempty"`
	Value []string `storm:"inline" json:"value,omitempty"`
}

type LinkGroup

type LinkGroup struct {
	Internal []PageLink `json:"internal,omitempty"`
	External []PageLink `json:"external,omitempty"`
}
type PageLink struct {
	Url   string `json:"url,omitempty"`
	Label string `json:"label,omitempty"`
}

type ParaKey added in v0.10.0

type ParaKey string
const (
	CONTEXT_SNAPSHOT   ParaKey = "SNAPSHOT"
	CONTEXT_PAGE_LINKS ParaKey = "PAGE_LINKS"
)

Common pipeline context keys

const (
	CONTEXT_TASK_ID               ParaKey = "GOPA_TASK_ID"
	CONTEXT_TASK_URL              ParaKey = "GOPA_TASK_URL"
	CONTEXT_TASK_Reference        ParaKey = "GOPA_TASK_Reference"
	CONTEXT_TASK_Depth            ParaKey = "GOPA_TASK_Depth"
	CONTEXT_TASK_Breadth          ParaKey = "GOPA_TASK_Breadth"
	CONTEXT_TASK_Host             ParaKey = "GOPA_TASK_Host"
	CONTEXT_TASK_Schema           ParaKey = "GOPA_TASK_Schema"
	CONTEXT_TASK_OriginalUrl      ParaKey = "GOPA_TASK_OriginalUrl"
	CONTEXT_TASK_Status           ParaKey = "GOPA_TASK_Status"
	CONTEXT_TASK_Message          ParaKey = "GOPA_TASK_Message"
	CONTEXT_TASK_Created          ParaKey = "GOPA_TASK_Created"
	CONTEXT_TASK_Updated          ParaKey = "GOPA_TASK_Updated"
	CONTEXT_TASK_LastFetch        ParaKey = "GOPA_TASK_LastFetch"
	CONTEXT_TASK_LastCheck        ParaKey = "GOPA_TASK_LastCheck"
	CONTEXT_TASK_NextCheck        ParaKey = "GOPA_TASK_NextCheck"
	CONTEXT_TASK_SnapshotID       ParaKey = "GOPA_TASK_SnapshotID"
	CONTEXT_TASK_SnapshotSimHash  ParaKey = "GOPA_TASK_SnapshotSimHash"
	CONTEXT_TASK_SnapshotHash     ParaKey = "GOPA_TASK_SnapshotHash"
	CONTEXT_TASK_SnapshotCreated  ParaKey = "GOPA_TASK_SnapshotCreated"
	CONTEXT_TASK_SnapshotVersion  ParaKey = "GOPA_TASK_SnapshotVersion"
	CONTEXT_TASK_LastScreenshotID ParaKey = "GOPA_TASK_LastScreenshotID"
	CONTEXT_TASK_PipelineConfigID ParaKey = "GOPA_TASK_PipelineConfigID"
	CONTEXT_TASK_Cookies          ParaKey = "GOPA_TASK_Cookies"

	CONTEXT_SNAPSHOT_ContentType ParaKey = "GOPA_SNAPSHOT_ContentType"
)

type Parameters added in v0.10.0

type Parameters struct {
	Data map[string]interface{} `json:"data,omitempty"`
	// contains filtered or unexported fields
}

func (*Parameters) Get added in v0.10.0

func (para *Parameters) Get(key ParaKey) interface{}

func (*Parameters) GetBool added in v0.10.0

func (para *Parameters) GetBool(key ParaKey, defaultV bool) bool

func (*Parameters) GetInt added in v0.10.0

func (para *Parameters) GetInt(key ParaKey, defaultV int) (int, bool)

func (*Parameters) GetInt64 added in v0.10.0

func (para *Parameters) GetInt64(key ParaKey, defaultV int64) (int64, bool)

func (*Parameters) GetInt64OrDefault added in v0.10.0

func (para *Parameters) GetInt64OrDefault(key ParaKey, defaultV int64) int64

func (*Parameters) GetIntOrDefault added in v0.10.0

func (para *Parameters) GetIntOrDefault(key ParaKey, defaultV int) int

func (*Parameters) GetMap added in v0.10.0

func (para *Parameters) GetMap(key ParaKey) (map[string]interface{}, bool)

func (*Parameters) GetOrDefault added in v0.10.0

func (para *Parameters) GetOrDefault(key ParaKey, val interface{}) interface{}

func (*Parameters) GetString added in v0.10.0

func (para *Parameters) GetString(key ParaKey) (string, bool)

func (*Parameters) GetStringArray added in v0.10.0

func (para *Parameters) GetStringArray(key ParaKey) ([]string, bool)

GetStringArray will return a array which type of the items are string

func (*Parameters) GetStringOrDefault added in v0.10.0

func (para *Parameters) GetStringOrDefault(key ParaKey, val string) string

func (*Parameters) GetTime added in v0.10.0

func (para *Parameters) GetTime(key ParaKey) (time.Time, bool)

func (*Parameters) Has added in v0.10.0

func (para *Parameters) Has(key ParaKey) bool

func (*Parameters) MustGet added in v0.10.0

func (para *Parameters) MustGet(key ParaKey) interface{}

func (*Parameters) MustGetBytes added in v0.10.0

func (para *Parameters) MustGetBytes(key ParaKey) []byte

func (*Parameters) MustGetInt added in v0.10.0

func (para *Parameters) MustGetInt(key ParaKey) int

MustGetInt return 0 if not key was found

func (*Parameters) MustGetInt64 added in v0.10.0

func (para *Parameters) MustGetInt64(key ParaKey) int64

func (*Parameters) MustGetMap added in v0.10.0

func (para *Parameters) MustGetMap(key ParaKey) map[string]interface{}

func (*Parameters) MustGetString added in v0.10.0

func (para *Parameters) MustGetString(key ParaKey) string

func (*Parameters) MustGetTime added in v0.10.0

func (para *Parameters) MustGetTime(key ParaKey) time.Time

func (*Parameters) Set added in v0.10.0

func (para *Parameters) Set(key ParaKey, value interface{})

type Pipeline added in v0.10.0

type Pipeline struct {
	// contains filtered or unexported fields
}

func NewPipeline added in v0.10.0

func NewPipeline(name string) *Pipeline

func NewPipelineFromConfig added in v0.10.0

func NewPipelineFromConfig(name string, config *PipelineConfig, context *Context) *Pipeline

func (*Pipeline) Context added in v0.10.0

func (pipe *Pipeline) Context(s *Context) *Pipeline

func (*Pipeline) End added in v0.10.0

func (pipe *Pipeline) End(s Joint) *Pipeline

func (*Pipeline) GetContext added in v0.10.0

func (pipe *Pipeline) GetContext() *Context

func (*Pipeline) GetCurrentJoint added in v0.10.0

func (pipe *Pipeline) GetCurrentJoint() string

func (*Pipeline) GetID added in v0.10.0

func (pipe *Pipeline) GetID() string

func (*Pipeline) Join added in v0.10.0

func (pipe *Pipeline) Join(s Joint) *Pipeline

func (*Pipeline) Run added in v0.10.0

func (pipe *Pipeline) Run() *Context

func (*Pipeline) Start added in v0.10.0

func (pipe *Pipeline) Start(s Joint) *Pipeline

type PipelineConfig added in v0.10.0

type PipelineConfig struct {
	ID            string         `json:"id,omitempty" index:"id"`
	Name          string         `json:"name,omitempty" config:"name"`
	StartJoint    *JointConfig   `gorm:"-" json:"start,omitempty" config:"start"`
	ProcessJoints []*JointConfig `gorm:"-" json:"process,omitempty" config:"process"`
	EndJoint      *JointConfig   `gorm:"-" json:"end,omitempty" config:"end"`
	Created       *time.Time     `json:"created,omitempty"`
	Updated       *time.Time     `json:"updated,omitempty"`
	Tags          []string       `gorm:"-" json:"tags,omitempty" config:"tags"`
}

PipelineConfig config for each pipeline, a pipeline may have more than one joints

func GetPipelineConfig added in v0.10.0

func GetPipelineConfig(id string) (*PipelineConfig, error)

func GetPipelineList added in v0.10.0

func GetPipelineList(from, size int) (int, []PipelineConfig, error)

type Project added in v0.10.0

type Project struct {
	ID          string    `storm:"id,unique" json:"id,omitempty" gorm:"not null;unique;primary_key" index:"id"`
	Name        string    `json:"name,omitempty"`
	Description string    `json:"description,omitempty"`
	Enabled     bool      `json:"enabled"`
	Created     time.Time `json:"created,omitempty"`
	Updated     time.Time `json:"updated,omitempty"`
}

Project is a definition, include a collection of Host

func GetProject added in v0.10.0

func GetProject(id string) (Project, error)

func GetProjectList added in v0.10.0

func GetProjectList(from, size int) (int, []Project, error)

type Snapshot

type Snapshot struct {
	ID      string `json:"id,omitempty" gorm:"not null;unique;primary_key" index:"id"`
	Version int    `json:"version,omitempty"`
	Url     string `json:"url,omitempty"`
	TaskID  string `json:"task_id,omitempty"`
	Path    string `json:"path,omitempty"  gorm:"-"` //path of this file
	File    string `json:"file,omitempty"  gorm:"-"` //filename of this page
	Ext     string `json:"ext,omitempty"  gorm:"-"`  //extension of filename

	StatusCode int    `json:"-" gorm:"-"`
	Payload    []byte `json:"-" gorm:"-"`
	Size       uint64 `json:"size,omitempty"`

	ScreenshotID string `json:"screenshot_id,omitempty"`

	Headers    map[string][]string     `json:"-" gorm:"-"`
	Metadata   *map[string]interface{} `json:"-" gorm:"-"`
	Parameters []KV                    `json:"-" gorm:"-"`

	Language string `json:"lang,omitempty" gorm:"-"`

	Title       string `json:"title,omitempty"`
	Summary     string `json:"summary,omitempty" gorm:"-"`
	Text        string `json:"text,omitempty" gorm:"-"`
	ContentType string `json:"content_type,omitempty"`

	Tags []string `json:"tags,omitempty" gorm:"-"`

	Links LinkGroup `json:"links,omitempty" gorm:"-"`

	Images struct {
		Internal []PageLink `json:"internal,omitempty"`
		External []PageLink `json:"external,omitempty"`
	} `json:"images,omitempty" gorm:"-"`

	H1     []string `json:"h1,omitempty" gorm:"-"`
	H2     []string `json:"h2,omitempty" gorm:"-"`
	H3     []string `json:"h3,omitempty" gorm:"-"`
	H4     []string `json:"h4,omitempty" gorm:"-"`
	H5     []string `json:"h5,omitempty" gorm:"-"`
	Bold   []string `json:"bold,omitempty" gorm:"-"`
	Italic []string `json:"italic,omitempty" gorm:"-"`

	Classifications  []string                `json:"classifications,omitempty" gorm:"-"`
	EnrichedFeatures *map[string]interface{} `json:"enriched_features,omitempty" gorm:"-"`

	Hash    string `json:"hash,omitempty"`
	SimHash string `json:"sim_hash,omitempty"`

	Created time.Time `json:"created,omitempty"`
}

func GetSnapshot added in v0.9.0

func GetSnapshot(id string) (Snapshot, error)

func GetSnapshotByField added in v0.9.0

func GetSnapshotByField(k, v string) ([]Snapshot, error)

func GetSnapshotList added in v0.9.0

func GetSnapshotList(from, size int, taskId string) (int, []Snapshot, error)

type Task

type Task struct {
	ID string `gorm:"not null;unique;primary_key" json:"id" index:"id"`
	// the url may not cleaned, may miss the host part, need reference to provide the complete url information
	Url         string    `storm:"index" json:"url,omitempty" gorm:"type:varchar(500)"`
	Reference   string    `json:"reference_url,omitempty"`
	Depth       int       `storm:"index" json:"depth"`
	Breadth     int       `storm:"index" json:"breadth"`
	Host        string    `gorm:"index" json:"host"`
	Schema      string    `json:"schema,omitempty"`
	OriginalUrl string    `json:"original_url,omitempty"`
	Status      int       `gorm:"index" json:"status"`
	Message     string    `json:"message,omitempty"`
	Created     time.Time `gorm:"index" json:"created,omitempty"`
	Updated     time.Time `gorm:"index" json:"updated,omitempty"`
	LastFetch   time.Time `gorm:"index" json:"last_fetch,omitempty"`
	LastCheck   time.Time `gorm:"index" json:"last_check,omitempty"`
	NextCheck   time.Time `gorm:"index" json:"next_check,omitempty"`

	SnapshotVersion  int       `json:"snapshot_version,omitempty"`
	SnapshotID       string    `json:"snapshot_id,omitempty"`
	SnapshotHash     string    `json:"snapshot_hash,omitempty"`
	SnapshotSimHash  string    `json:"snapshot_simhash,omitempty"`
	SnapshotCreated  time.Time `json:"snapshot_created,omitempty"`
	LastScreenshotID string    `json:"last_screenshot_id,omitempty"`

	PipelineConfigID string      `json:"pipline_config_id,omitempty"`
	HostConfig       *HostConfig `json:"host_config,omitempty"`

	// transient properties
	Snapshots     []Snapshot `json:"-"`
	SnapshotCount int        `json:"-"`
}

func GetFailedTasks added in v0.10.0

func GetFailedTasks(offset time.Time) (int, []Task, error)

func GetPendingNewFetchTasks

func GetPendingNewFetchTasks(offset time.Time) (int, []Task, error)

func GetPendingUpdateFetchTasks

func GetPendingUpdateFetchTasks(offset time.Time) (int, []Task, error)

func GetTask

func GetTask(id string) (Task, error)

func GetTaskByField

func GetTaskByField(k, v string) ([]Task, error)

func GetTaskList

func GetTaskList(from, size int, host string, status int) (int, []Task, error)

func NewTask added in v0.10.0

func NewTask(url, ref string, depth int, breadth int) *Task

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL