parser

package
v0.0.0-...-ddbee9b Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 2, 2017 License: GPL-3.0 Imports: 16 Imported by: 0

Documentation

Overview

patterns

patterns

patterns

patterns

patterns

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ByteToKind

func ByteToKind(t reflect.Kind, data []byte) (interface{}, error)

func Render

func Render(w io.Writer, n *html.Node) error

Types

type CompiledField

type CompiledField struct {
	// contains filtered or unexported fields
}

func (*CompiledField) RelativePath

func (f *CompiledField) RelativePath() string

get relative path like: "Root.SubField1.Subfield2..."

func (*CompiledField) Retrieve

func (f *CompiledField) Retrieve(root *html.Node) (result interface{})

type CompiledMap

type CompiledMap struct {
	// contains filtered or unexported fields
}

func (*CompiledMap) ApplyHtml

func (p *CompiledMap) ApplyHtml(url string, context *html.Node) interface{}

type CompiledRegexRules

type CompiledRegexRules struct {
	Submatch *regexp.Regexp
	Include  []*regexp.Regexp
	Exclude  []*regexp.Regexp
	Remove   []*regexp.Regexp
}

func (*CompiledRegexRules) Clean

func (p *CompiledRegexRules) Clean(s []byte) []byte

func (*CompiledRegexRules) FindMultiple

func (p *CompiledRegexRules) FindMultiple(s []byte) [][]byte

func (*CompiledRegexRules) FindOne

func (p *CompiledRegexRules) FindOne(s []byte) []byte

func (*CompiledRegexRules) Test

func (p *CompiledRegexRules) Test(s []byte) bool

type CompiledXpathRules

type CompiledXpathRules struct {
	Include []*xpath.Expr
	Exclude []*xpath.Expr
	Remove  []*xpath.Expr
}

func (*CompiledXpathRules) Clean

func (p *CompiledXpathRules) Clean(s *html.Node) *html.Node

func (*CompiledXpathRules) Test

test if complies to defined rules

type Field

type Field struct {
	// field title
	Title string `xml:"title,attr"`

	// field type: int, string, float64, struct, html
	Type string `xml:"type,attr"`

	// xpath expression to find field
	Path string

	// data filter or transformaion based on regex expressions; see rules.go
	Data *RegexRules

	// data filter based on xpath expressions
	XData *XpathRules

	// optional field
	Optional bool `xml:"optional,attr,omitempty"`

	// deprecated
	DontStore bool `xml:"dontstore,attr,omitempty"`

	//
	Multiple bool `xml:"multiple,attr,omitempty"`

	// only unique fields
	Unique bool `xml:"unique,attr,omitempty"`

	// preserve HTML attributes (only works if field type="html")
	Attr bool `xml:"attr,attr,omitempty"`

	// sub-fields declaration
	Field []*Field
}

Field struct declaration: <Field>...</Field>

func (*Field) Compile

func (f *Field) Compile() (*CompiledField, error)

func (*Field) FindChildField

func (f *Field) FindChildField(name string) *Field

func (*Field) FindField

func (f *Field) FindField(addr string) *Field

func (*Field) Serialize

func (f *Field) Serialize() []*Field

type Map

type Map struct {
	//	Title   string `xml:"title,attr"`
	Storage string `xml:"storage,attr,omitempty"`
	Field   *Field
	URL     *RegexRules
	Mime    string `xml:"mime,attr"`
}

func (*Map) Compile

func (p *Map) Compile() (*CompiledMap, error)

func (*Map) MarshalXml

func (f *Map) MarshalXml() ([]byte, error)

func (*Map) MarshalYaml

func (f *Map) MarshalYaml() ([]byte, error)

func (*Map) UnmarshalXml

func (f *Map) UnmarshalXml(data []byte) error

func (*Map) UnmarshalYaml

func (f *Map) UnmarshalYaml(data []byte) error

type PatternNode

type PatternNode map[string]interface{}

func (*PatternNode) ApplyPatterns

func (pn *PatternNode) ApplyPatterns(url string, data *html.Node) map[string]interface{}

Applies XML patterns to input (URL "address" and HTML "content"). Returns map with result data.

func (*PatternNode) ListPatterns

func (pn *PatternNode) ListPatterns() []string

type Patterns

type Patterns struct {
	Tree *PatternNode
	Log  *log.Logger
}

func NewPatterns

func NewPatterns(log *log.Logger) *Patterns

func (*Patterns) Apply

func (p *Patterns) Apply(url string, content io.Reader) (map[string]interface{}, error)

func (*Patterns) Load

func (p *Patterns) Load(el *PatternNode, path string) error

func (*Patterns) LoadTree

func (p *Patterns) LoadTree(path string) error

func (*Patterns) LoadXml

func (p *Patterns) LoadXml(el *PatternNode, data []byte, itemName string) error

func (*Patterns) LoadYaml

func (p *Patterns) LoadYaml(el *PatternNode, data []byte, itemName string) error

type RegexRules

type RegexRules struct {
	Submatch string
	Include  string
	Exclude  string
	Remove   string
}

func (*RegexRules) Compile

func (p *RegexRules) Compile() (*CompiledRegexRules, error)

type Type

type Type struct {
	// contains filtered or unexported fields
}

func CompileType

func CompileType(typeName string) (*Type, error)

type XpathRules

type XpathRules struct {
	Include string
	Exclude string
	Remove  string
}

func (*XpathRules) Compile

func (p *XpathRules) Compile() (*CompiledXpathRules, error)

brings to html cdata to compiled form

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL